source/encoder/entropy.cpp

   1 /*****************************************************************************
   2 * Copyright (C) 2013 x265 project
   3 *
   4 * Authors: Steve Borho <steve@borho.org>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  19 *
  20 * This program is also available under a commercial proprietary license.
  21 * For more information, contact us at license @ x265.com.
  22 *****************************************************************************/
  23
  24 #include "common.h"
  25 #include "framedata.h"
  26 #include "scalinglist.h"
  27 #include "quant.h"
  28 #include "contexts.h"
  29 #include "picyuv.h"
  30
  31 #include "sao.h"
  32 #include "entropy.h"
  33
  34 #define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
  35 #define CU_DQP_EG_k    0 // exp-golomb order
  36 #define START_VALUE    8 // start value for dpcm mode
  37
  38 static const uint32_t g_puOffset[8] = { 0, 8, 4, 4, 2, 10, 1, 5 };
  39
  40 namespace x265 {
  41
  42 Entropy::Entropy()
  43 {
  44     markValid();
  45     m_fracBits = 0;
  46     X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
  47 }
  48
  49 void Entropy::codeVPS(const VPS& vps)
  50 {
  51     WRITE_CODE(0,       4, "vps_video_parameter_set_id");
  52     WRITE_CODE(3,       2, "vps_reserved_three_2bits");
  53     WRITE_CODE(0,       6, "vps_reserved_zero_6bits");
  54     WRITE_CODE(0,       3, "vps_max_sub_layers_minus1");
  55     WRITE_FLAG(1,          "vps_temporal_id_nesting_flag");
  56     WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
  57
  58     codeProfileTier(vps.ptl);
  59
  60     WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
  61     WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
  62     WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
  63
  64     WRITE_UVLC(0,    "vps_max_latency_increase_plus1[i]");
  65     WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
  66     WRITE_UVLC(0,    "vps_max_op_sets_minus1");
  67     WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
  68     WRITE_FLAG(0,    "vps_extension_flag");
  69 }
  70
  71 void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
  72 {
  73     WRITE_CODE(0, 4, "sps_video_parameter_set_id");
  74     WRITE_CODE(0, 3, "sps_max_sub_layers_minus1");
  75     WRITE_FLAG(1,    "sps_temporal_id_nesting_flag");
  76
  77     codeProfileTier(ptl);
  78
  79     WRITE_UVLC(0, "sps_seq_parameter_set_id");
  80     WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
  81
  82     if (sps.chromaFormatIdc == X265_CSP_I444)
  83         WRITE_FLAG(0,                        "separate_colour_plane_flag");
  84
  85     WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
  86     WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
  87
  88     const Window& conf = sps.conformanceWindow;
  89     WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
  90     if (conf.bEnabled)
  91     {
  92         int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
  93         WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
  94         WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
  95         WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
  96         WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
  97     }
  98
  99     WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
 100     WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
 101     WRITE_UVLC(BITS_FOR_POC - 4, "log2_max_pic_order_cnt_lsb_minus4");
 102     WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
 103
 104     WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
 105     WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
 106     WRITE_UVLC(0,                          "sps_max_latency_increase_plus1[i]");
 107
 108     WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
 109     WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
 110     WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
 111     WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
 112     WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
 113     WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
 114     WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
 115     if (scalingList.m_bEnabled)
 116     {
 117         WRITE_FLAG(scalingList.m_bDataPresent,    "sps_scaling_list_data_present_flag");
 118         if (scalingList.m_bDataPresent)
 119             codeScalingList(scalingList);
 120     }
 121     WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
 122     WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
 123
 124     WRITE_FLAG(0, "pcm_enabled_flag");
 125     WRITE_UVLC(0, "num_short_term_ref_pic_sets");
 126     WRITE_FLAG(0, "long_term_ref_pics_present_flag");
 127
 128     WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
 129     WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
 130
 131     WRITE_FLAG(1, "vui_parameters_present_flag");
 132     codeVUI(sps.vuiParameters);
 133
 134     WRITE_FLAG(0, "sps_extension_flag");
 135 }
 136
 137 void Entropy::codePPS(const PPS& pps)
 138 {
 139     WRITE_UVLC(0,                          "pps_pic_parameter_set_id");
 140     WRITE_UVLC(0,                          "pps_seq_parameter_set_id");
 141     WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
 142     WRITE_FLAG(0,                          "output_flag_present_flag");
 143     WRITE_CODE(0, 3,                       "num_extra_slice_header_bits");
 144     WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
 145     WRITE_FLAG(0,                          "cabac_init_present_flag");
 146     WRITE_UVLC(0,                          "num_ref_idx_l0_default_active_minus1");
 147     WRITE_UVLC(0,                          "num_ref_idx_l1_default_active_minus1");
 148
 149     WRITE_SVLC(0, "init_qp_minus26");
 150     WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
 151     WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
 152
 153     WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
 154     if (pps.bUseDQP)
 155         WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
 156
 157     WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
 158     WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
 159     WRITE_FLAG(0,                          "pps_slice_chroma_qp_offsets_present_flag");
 160
 161     WRITE_FLAG(pps.bUseWeightPred,            "weighted_pred_flag");
 162     WRITE_FLAG(pps.bUseWeightedBiPred,        "weighted_bipred_flag");
 163     WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
 164     WRITE_FLAG(0,                             "tiles_enabled_flag");
 165     WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
 166     WRITE_FLAG(1,                             "loop_filter_across_slices_enabled_flag");
 167
 168     WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
 169     if (pps.bDeblockingFilterControlPresent)
 170     {
 171         WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
 172         WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
 173         if (!pps.bPicDisableDeblockingFilter)
 174         {
 175             WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
 176             WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
 177         }
 178     }
 179
 180     WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
 181     WRITE_FLAG(0, "lists_modification_present_flag");
 182     WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
 183     WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
 184     WRITE_FLAG(0, "pps_extension_flag");
 185 }
 186
 187 void Entropy::codeProfileTier(const ProfileTierLevel& ptl)
 188 {
 189     WRITE_CODE(0, 2,                "XXX_profile_space[]");
 190     WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
 191     WRITE_CODE(ptl.profileIdc, 5,   "XXX_profile_idc[]");
 192     for (int j = 0; j < 32; j++)
 193         WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
 194
 195     WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
 196     WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
 197     WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
 198     WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
 199
 200     if (ptl.profileIdc == Profile::MAINREXT || ptl.profileIdc == Profile::HIGHTHROUGHPUTREXT)
 201     {
 202         uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
 203         int csp = ptl.chromaFormatConstraint;
 204         WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
 205         WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
 206         WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
 207         WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
 208         WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
 209         WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
 210         WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
 211         WRITE_FLAG(0,                              "general_one_picture_only_constraint_flag");
 212         WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
 213         WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[0..15]");
 214         WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[16..31]");
 215         WRITE_CODE(0 ,  3, "XXX_reserved_zero_35bits[32..34]");
 216     }
 217     else
 218     {
 219         WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
 220         WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
 221         WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
 222     }
 223
 224     WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
 225 }
 226
 227 void Entropy::codeVUI(const VUI& vui)
 228 {
 229     WRITE_FLAG(vui.aspectRatioInfoPresentFlag,  "aspect_ratio_info_present_flag");
 230     if (vui.aspectRatioInfoPresentFlag)
 231     {
 232         WRITE_CODE(vui.aspectRatioIdc, 8,       "aspect_ratio_idc");
 233         if (vui.aspectRatioIdc == 255)
 234         {
 235             WRITE_CODE(vui.sarWidth, 16,        "sar_width");
 236             WRITE_CODE(vui.sarHeight, 16,       "sar_height");
 237         }
 238     }
 239
 240     WRITE_FLAG(vui.overscanInfoPresentFlag,     "overscan_info_present_flag");
 241     if (vui.overscanInfoPresentFlag)
 242         WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
 243
 244     WRITE_FLAG(vui.videoSignalTypePresentFlag,  "video_signal_type_present_flag");
 245     if (vui.videoSignalTypePresentFlag)
 246     {
 247         WRITE_CODE(vui.videoFormat, 3,          "video_format");
 248         WRITE_FLAG(vui.videoFullRangeFlag,      "video_full_range_flag");
 249         WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
 250         if (vui.colourDescriptionPresentFlag)
 251         {
 252             WRITE_CODE(vui.colourPrimaries, 8,         "colour_primaries");
 253             WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
 254             WRITE_CODE(vui.matrixCoefficients, 8,      "matrix_coefficients");
 255         }
 256     }
 257
 258     WRITE_FLAG(vui.chromaLocInfoPresentFlag,           "chroma_loc_info_present_flag");
 259     if (vui.chromaLocInfoPresentFlag)
 260     {
 261         WRITE_UVLC(vui.chromaSampleLocTypeTopField,    "chroma_sample_loc_type_top_field");
 262         WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
 263     }
 264
 265     WRITE_FLAG(0,                                     "neutral_chroma_indication_flag");
 266     WRITE_FLAG(vui.fieldSeqFlag,                      "field_seq_flag");
 267     WRITE_FLAG(vui.frameFieldInfoPresentFlag,         "frame_field_info_present_flag");
 268
 269     WRITE_FLAG(vui.defaultDisplayWindow.bEnabled,    "default_display_window_flag");
 270     if (vui.defaultDisplayWindow.bEnabled)
 271     {
 272         WRITE_UVLC(vui.defaultDisplayWindow.leftOffset,   "def_disp_win_left_offset");
 273         WRITE_UVLC(vui.defaultDisplayWindow.rightOffset,  "def_disp_win_right_offset");
 274         WRITE_UVLC(vui.defaultDisplayWindow.topOffset,    "def_disp_win_top_offset");
 275         WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
 276     }
 277
 278     WRITE_FLAG(1,                                 "vui_timing_info_present_flag");
 279     WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
 280     WRITE_CODE(vui.timingInfo.timeScale, 32,      "vui_time_scale");
 281     WRITE_FLAG(0,                                 "vui_poc_proportional_to_timing_flag");
 282
 283     WRITE_FLAG(vui.hrdParametersPresentFlag,  "vui_hrd_parameters_present_flag");
 284     if (vui.hrdParametersPresentFlag)
 285         codeHrdParameters(vui.hrdParameters);
 286
 287     WRITE_FLAG(0, "bitstream_restriction_flag");
 288 }
 289
 290 void Entropy::codeScalingList(const ScalingList& scalingList)
 291 {
 292     for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
 293     {
 294         for (int listId = 0; listId < ScalingList::NUM_LISTS; listId++)
 295         {
 296             int predList = scalingList.checkPredMode(sizeId, listId);
 297             WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
 298             if (predList >= 0)
 299                 WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
 300             else // DPCM Mode
 301                 codeScalingList(scalingList, sizeId, listId);
 302         }
 303     }
 304 }
 305
 306 void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
 307 {
 308     int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
 309     const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
 310     int nextCoef = START_VALUE;
 311     int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
 312     int data;
 313
 314     if (sizeId > BLOCK_8x8)
 315     {
 316         WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
 317         nextCoef = scalingList.m_scalingListDC[sizeId][listId];
 318     }
 319     for (int i = 0; i < coefNum; i++)
 320     {
 321         data = src[scan[i]] - nextCoef;
 322         nextCoef = src[scan[i]];
 323         if (data > 127)
 324             data = data - 256;
 325         if (data < -128)
 326             data = data + 256;
 327
 328         WRITE_SVLC(data,  "scaling_list_delta_coef");
 329     }
 330 }
 331
 332 void Entropy::codeHrdParameters(const HRDInfo& hrd)
 333 {
 334     WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
 335     WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
 336     WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
 337
 338     WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
 339     WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
 340
 341     WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
 342     WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
 343     WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
 344
 345     WRITE_FLAG(1, "fixed_pic_rate_general_flag");
 346     WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
 347     WRITE_UVLC(0, "cpb_cnt_minus1");
 348
 349     WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
 350     WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
 351     WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
 352 }
 353
 354 void Entropy::codeAUD(const Slice& slice)
 355 {
 356     int picType;
 357
 358     switch (slice.m_sliceType)
 359     {
 360     case I_SLICE:
 361         picType = 0;
 362         break;
 363     case P_SLICE:
 364         picType = 1;
 365         break;
 366     case B_SLICE:
 367         picType = 2;
 368         break;
 369     default:
 370         picType = 7;
 371         break;
 372     }
 373
 374     WRITE_CODE(picType, 3, "pic_type");
 375 }
 376
 377 void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData)
 378 {
 379     WRITE_FLAG(1, "first_slice_segment_in_pic_flag");
 380     if (slice.getRapPicFlag())
 381         WRITE_FLAG(0, "no_output_of_prior_pics_flag");
 382
 383     WRITE_UVLC(0, "slice_pic_parameter_set_id");
 384
 385     /* x265 does not use dependent slices, so always write all this data */
 386
 387     WRITE_UVLC(slice.m_sliceType, "slice_type");
 388
 389     if (!slice.getIdrPicFlag())
 390     {
 391         int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << BITS_FOR_POC)) % (1 << BITS_FOR_POC);
 392         WRITE_CODE(picOrderCntLSB, BITS_FOR_POC, "pic_order_cnt_lsb");
 393
 394 #if _DEBUG || CHECKED_BUILD
 395         // check for bitstream restriction stating that:
 396         // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
 397         // Ideally this process should not be repeated for each slice in a picture
 398         if (slice.isIRAP())
 399             for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
 400             {
 401                 X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
 402             }
 403 #endif
 404
 405         WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
 406         codeShortTermRefPicSet(slice.m_rps);
 407
 408         if (slice.m_sps->bTemporalMVPEnabled)
 409             WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
 410     }
 411     const SAOParam *saoParam = encData.m_saoParam;
 412     if (slice.m_sps->bUseSAO)
 413     {
 414         WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
 415         WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
 416     }
 417
 418     // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
 419     // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
 420
 421     if (!slice.isIntra())
 422     {
 423         bool overrideFlag = (slice.m_numRefIdx[0] != 1 || (slice.isInterB() && slice.m_numRefIdx[1] != 1));
 424         WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
 425         if (overrideFlag)
 426         {
 427             WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
 428             if (slice.isInterB())
 429                 WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
 430             else
 431             {
 432                 X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
 433             }
 434         }
 435     }
 436     else
 437     {
 438         X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
 439     }
 440
 441     if (slice.isInterB())
 442         WRITE_FLAG(0, "mvd_l1_zero_flag");
 443
 444     if (slice.m_sps->bTemporalMVPEnabled)
 445     {
 446         if (slice.m_sliceType == B_SLICE)
 447             WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
 448
 449         if (slice.m_sliceType != I_SLICE &&
 450             ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
 451             (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
 452         {
 453             WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
 454         }
 455     }
 456     if ((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE))
 457         codePredWeightTable(slice);
 458
 459     X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
 460     if (!slice.isIntra())
 461         WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
 462
 463     int code = slice.m_sliceQp - 26;
 464     WRITE_SVLC(code, "slice_qp_delta");
 465
 466     bool isSAOEnabled = slice.m_sps->bUseSAO ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
 467     bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
 468
 469     if (isSAOEnabled || isDBFEnabled)
 470         WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
 471 }
 472
 473 /** write wavefront substreams sizes for the slice header */
 474 void Entropy::codeSliceHeaderWPPEntryPoints(const Slice& slice, const uint32_t *substreamSizes, uint32_t maxOffset)
 475 {
 476     uint32_t offsetLen = 1;
 477     while (maxOffset >= (1U << offsetLen))
 478     {
 479         offsetLen++;
 480         X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
 481     }
 482
 483     uint32_t numRows = slice.m_sps->numCuInHeight - 1;
 484     WRITE_UVLC(numRows, "num_entry_point_offsets");
 485     if (numRows > 0)
 486         WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
 487
 488     for (uint32_t i = 0; i < numRows; i++)
 489         WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
 490 }
 491
 492 void Entropy::codeShortTermRefPicSet(const RPS& rps)
 493 {
 494     WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
 495     WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
 496     int prev = 0;
 497     for (int j = 0; j < rps.numberOfNegativePictures; j++)
 498     {
 499         WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
 500         prev = rps.deltaPOC[j];
 501         WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
 502     }
 503
 504     prev = 0;
 505     for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
 506     {
 507         WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
 508         prev = rps.deltaPOC[j];
 509         WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
 510     }
 511 }
 512
 513 void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
 514 {
 515     bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
 516     encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
 517 }
 518
 519 /* encode a CU block recursively */
 520 void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
 521 {
 522     const Slice* slice = ctu.m_slice;
 523
 524     if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
 525         bEncodeDQP = true;
 526
 527     int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
 528     int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
 529
 530     if (!cuUnsplitFlag)
 531     {
 532         uint32_t qNumParts = cuGeom.numPartitions >> 2;
 533         for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
 534         {
 535             const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
 536             if (childGeom.flags & CUGeom::PRESENT)
 537                 encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
 538         }
 539         return;
 540     }
 541
 542     // We need to split, so don't try these modes.
 543     if (cuSplitFlag)
 544         codeSplitFlag(ctu, absPartIdx, depth);
 545
 546     if (depth < ctu.m_cuDepth[absPartIdx] && depth < g_maxCUDepth)
 547     {
 548         uint32_t qNumParts = cuGeom.numPartitions >> 2;
 549         for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
 550         {
 551             const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
 552             encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
 553         }
 554         return;
 555     }
 556
 557     if (slice->m_pps->bTransquantBypassEnabled)
 558         codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
 559
 560     if (!slice->isIntra())
 561     {
 562         codeSkipFlag(ctu, absPartIdx);
 563         if (ctu.isSkipped(absPartIdx))
 564         {
 565             codeMergeIndex(ctu, absPartIdx);
 566             finishCU(ctu, absPartIdx, depth);
 567             return;
 568         }
 569         codePredMode(ctu.m_predMode[absPartIdx]);
 570     }
 571
 572     codePartSize(ctu, absPartIdx, depth);
 573
 574     // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
 575     codePredInfo(ctu, absPartIdx);
 576
 577     uint32_t tuDepthRange[2];
 578     if (ctu.isIntra(absPartIdx))
 579         ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
 580     else
 581         ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
 582
 583     // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
 584     codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
 585
 586     // --- write terminating bit ---
 587     finishCU(ctu, absPartIdx, depth);
 588 }
 589
 590 /* Return bit count of signaling inter mode */
 591 uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
 592 {
 593     uint32_t bits;
 594     bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
 595     bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
 596     PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
 597     switch (partSize)
 598     {
 599     case SIZE_2Nx2N:
 600         bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
 601         break;
 602
 603     case SIZE_2NxN:
 604     case SIZE_2NxnU:
 605     case SIZE_2NxnD:
 606         bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
 607         bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
 608         if (cu.m_slice->m_sps->maxAMPDepth > depth)
 609         {
 610             bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
 611             if (partSize != SIZE_2NxN)
 612                 bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
 613         }
 614         break;
 615
 616     case SIZE_Nx2N:
 617     case SIZE_nLx2N:
 618     case SIZE_nRx2N:
 619         bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
 620         bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
 621         if (depth == g_maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
 622             bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
 623         if (cu.m_slice->m_sps->maxAMPDepth > depth)
 624         {
 625             bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
 626             if (partSize != SIZE_Nx2N)
 627                 bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
 628         }
 629         break;
 630     default:
 631         X265_CHECK(0, "invalid CU partition\n");
 632         break;
 633     }
 634
 635     return bits;
 636 }
 637
 638 /* finish encoding a cu and handle end-of-slice conditions */
 639 void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth)
 640 {
 641     const Slice* slice = ctu.m_slice;
 642     uint32_t realEndAddress = slice->m_endCUAddr;
 643     uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
 644     X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
 645
 646     uint32_t granularityMask = g_maxCUSize - 1;
 647     uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
 648     uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
 649     uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
 650     bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
 651                                 ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
 652
 653     if (granularityBoundary)
 654     {
 655         // Encode slice finish
 656         bool bTerminateSlice = false;
 657         if (cuAddr + (NUM_CU_PARTITIONS >> (depth << 1)) == realEndAddress)
 658             bTerminateSlice = true;
 659
 660         // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
 661         if (!bTerminateSlice)
 662             encodeBinTrm(0);
 663
 664         if (!m_bitIf)
 665             resetBits(); // TODO: most likely unnecessary
 666     }
 667 }
 668
 669 void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t tuDepth, uint32_t log2TrSize,
 670                               bool& bCodeDQP, const uint32_t depthRange[2])
 671 {
 672     const bool subdiv = cu.m_tuDepth[absPartIdx] > tuDepth;
 673
 674     /* in each of these conditions, the subdiv flag is implied and not signaled,
 675      * so we have checks to make sure the implied value matches our intentions */
 676     if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && !tuDepth)
 677     {
 678         X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
 679     }
 680     else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && !tuDepth &&
 681              cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
 682     {
 683         X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2TrSize %d, depthRange[0] %d\n", log2TrSize, depthRange[0]);
 684     }
 685     else if (log2TrSize > depthRange[1])
 686     {
 687         X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
 688     }
 689     else if (log2TrSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2TrSize == depthRange[0])
 690     {
 691         X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
 692     }
 693     else
 694     {
 695         X265_CHECK(log2TrSize > depthRange[0], "transform size failure\n");
 696         codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
 697     }
 698
 699     uint32_t hChromaShift = cu.m_hChromaShift;
 700     uint32_t vChromaShift = cu.m_vChromaShift;
 701     bool bSmallChroma = (log2TrSize - hChromaShift < 2);
 702     if (!tuDepth || !bSmallChroma)
 703     {
 704         if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1))
 705             codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, tuDepth, !subdiv);
 706         if (!tuDepth || cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1))
 707             codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, tuDepth, !subdiv);
 708     }
 709     else
 710     {
 711         X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, tuDepth - 1), "chroma xform size match failure\n");
 712         X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, tuDepth - 1), "chroma xform size match failure\n");
 713     }
 714
 715     if (subdiv)
 716     {
 717         --log2TrSize;
 718         ++tuDepth;
 719
 720         uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE) * 2;
 721
 722         encodeTransform(cu, absPartIdx + 0 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
 723         encodeTransform(cu, absPartIdx + 1 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
 724         encodeTransform(cu, absPartIdx + 2 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
 725         encodeTransform(cu, absPartIdx + 3 * qNumParts, tuDepth, log2TrSize, bCodeDQP, depthRange);
 726         return;
 727     }
 728
 729     uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
 730
 731     if (cu.isInter(absPartIdxC) && !tuDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
 732     {
 733         X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
 734     }
 735     else
 736         codeQtCbfLuma(cu, absPartIdx, tuDepth);
 737
 738     uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, tuDepth);
 739     uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, tuDepth);
 740     uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, tuDepth);
 741     if (!(cbfY || cbfU || cbfV))
 742         return;
 743
 744     // dQP: only for CTU once
 745     if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
 746     {
 747         uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
 748         uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
 749         codeDeltaQP(cu, absPartIdxLT);
 750         bCodeDQP = false;
 751     }
 752
 753     if (cbfY)
 754     {
 755         uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
 756         codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2TrSize, TEXT_LUMA);
 757         if (!(cbfU || cbfV))
 758             return;
 759     }
 760
 761     if (bSmallChroma)
 762     {
 763         if ((absPartIdx & 3) != 3)
 764             return;
 765
 766         const uint32_t log2TrSizeC = 2;
 767         const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
 768         const uint32_t curPartNum = 4;
 769         uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
 770         for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
 771         {
 772             TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
 773             const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
 774             do
 775             {
 776                 if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, tuDepth + splitIntoSubTUs))
 777                 {
 778                     uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
 779                     codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
 780                 }
 781             }
 782             while (tuIterator.isNextSection());
 783         }
 784     }
 785     else
 786     {
 787         uint32_t log2TrSizeC = log2TrSize - hChromaShift;
 788         const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
 789         uint32_t curPartNum = 1 << (log2TrSize - LOG2_UNIT_SIZE) * 2;
 790         uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
 791         for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
 792         {
 793             TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
 794             const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
 795             do
 796             {
 797                 if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, tuDepth + splitIntoSubTUs))
 798                 {
 799                     uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
 800                     codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
 801                 }
 802             }
 803             while (tuIterator.isNextSection());
 804         }
 805     }
 806 }
 807
 808 void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
 809 {
 810     if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
 811     {
 812         codeIntraDirLumaAng(cu, absPartIdx, true);
 813         if (cu.m_chromaFormat != X265_CSP_I400)
 814         {
 815             uint32_t chromaDirMode[NUM_CHROMA_MODE];
 816             cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
 817
 818             codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
 819
 820             if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
 821             {
 822                 uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
 823                 for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
 824                 {
 825                     absPartIdx += qNumParts;
 826                     cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
 827                     codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
 828                 }
 829             }
 830         }
 831     }
 832     else // if it is inter mode, encode motion vector and reference index
 833         codePUWise(cu, absPartIdx);
 834 }
 835
 836 /** encode motion information for every PU block */
 837 void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
 838 {
 839     PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
 840     uint32_t numPU = (partSize == SIZE_2Nx2N ? 1 : (partSize == SIZE_NxN ? 4 : 2));
 841     uint32_t depth = cu.m_cuDepth[absPartIdx];
 842     uint32_t puOffset = (g_puOffset[uint32_t(partSize)] << (g_maxFullDepth - depth) * 2) >> 4;
 843
 844     for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += puOffset)
 845     {
 846         codeMergeFlag(cu, subPartIdx);
 847         if (cu.m_mergeFlag[subPartIdx])
 848             codeMergeIndex(cu, subPartIdx);
 849         else
 850         {
 851             if (cu.m_slice->isInterB())
 852                 codeInterDir(cu, subPartIdx);
 853
 854             uint32_t interDir = cu.m_interDir[subPartIdx];
 855             for (uint32_t list = 0; list < 2; list++)
 856             {
 857                 if (interDir & (1 << list))
 858                 {
 859                     X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
 860
 861                     codeRefFrmIdxPU(cu, subPartIdx, list);
 862                     codeMvd(cu, subPartIdx, list);
 863                     codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
 864                 }
 865             }
 866         }
 867     }
 868 }
 869
 870 /** encode reference frame index for a PU block */
 871 void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
 872 {
 873     X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
 874
 875     if (cu.m_slice->m_numRefIdx[list] > 1)
 876         codeRefFrmIdx(cu, absPartIdx, list);
 877 }
 878
 879 void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
 880 {
 881     if (!cu.isIntra(absPartIdx))
 882     {
 883         if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
 884             codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
 885         if (!cu.getQtRootCbf(absPartIdx))
 886             return;
 887     }
 888
 889     uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
 890     encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
 891 }
 892
 893 void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
 894 {
 895     int typeIdx = ctuParam.typeIdx;
 896
 897     if (plane != 2)
 898     {
 899         encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
 900         if (typeIdx >= 0)
 901             encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
 902     }
 903
 904     if (typeIdx >= 0)
 905     {
 906         enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
 907         if (typeIdx == SAO_BO)
 908         {
 909             for (int i = 0; i < SAO_BO_LEN; i++)
 910                 codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
 911
 912             for (int i = 0; i < SAO_BO_LEN; i++)
 913                 if (ctuParam.offset[i] != 0)
 914                     encodeBinEP(ctuParam.offset[i] < 0);
 915
 916             encodeBinsEP(ctuParam.bandPos, 5);
 917         }
 918         else // if (typeIdx < SAO_BO)
 919         {
 920             codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
 921             codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
 922             codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
 923             codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
 924             if (plane != 2)
 925                 encodeBinsEP((uint32_t)(typeIdx), 2);
 926         }
 927     }
 928 }
 929
 930 /** initialize context model with respect to QP and initialization value */
 931 uint8_t sbacInit(int qp, int initValue)
 932 {
 933     qp = Clip3(0, 51, qp);
 934
 935     int  slope      = (initValue >> 4) * 5 - 45;
 936     int  offset     = ((initValue & 15) << 3) - 16;
 937     int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
 938     uint32_t mpState = (initState >= 64);
 939     uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
 940
 941     return (uint8_t)state;
 942 }
 943
 944 static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
 945 {
 946     ctxModel += sliceType * size;
 947
 948     for (int n = 0; n < size; n++)
 949         contextModel[n] = sbacInit(qp, ctxModel[n]);
 950 }
 951
 952 void Entropy::resetEntropy(const Slice& slice)
 953 {
 954     int  qp              = slice.m_sliceQp;
 955     SliceType sliceType  = slice.m_sliceType;
 956
 957     initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
 958     initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
 959     initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
 960     initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
 961     initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
 962     initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
 963     initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
 964     initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
 965     initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
 966     initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
 967     initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
 968     initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
 969     initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
 970     initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
 971     initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
 972     initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
 973     initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
 974     initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
 975     initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
 976     initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
 977     initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
 978     initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
 979     initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
 980     initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
 981     initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
 982     initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
 983     // new structure
 984
 985     start();
 986 }
 987
 988 /* code explicit wp tables */
 989 void Entropy::codePredWeightTable(const Slice& slice)
 990 {
 991     const WeightParam *wp;
 992     bool            bChroma      = true; // 4:0:0 not yet supported
 993     bool            bDenomCoded  = false;
 994     int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
 995     uint32_t        totalSignalledWeightFlags = 0;
 996
 997     if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
 998         (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
 999     {
1000         for (int list = 0; list < numRefDirs; list++)
1001         {
1002             for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1003             {
1004                 wp = slice.m_weightPredTable[list][ref];
1005                 if (!bDenomCoded)
1006                 {
1007                     WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1008
1009                     if (bChroma)
1010                     {
1011                         int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1012                         WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1013                     }
1014                     bDenomCoded = true;
1015                 }
1016                 WRITE_FLAG(wp[0].bPresentFlag, "luma_weight_lX_flag");
1017                 totalSignalledWeightFlags += wp[0].bPresentFlag;
1018             }
1019
1020             if (bChroma)
1021             {
1022                 for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1023                 {
1024                     wp = slice.m_weightPredTable[list][ref];
1025                     WRITE_FLAG(wp[1].bPresentFlag, "chroma_weight_lX_flag");
1026                     totalSignalledWeightFlags += 2 * wp[1].bPresentFlag;
1027                 }
1028             }
1029
1030             for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1031             {
1032                 wp = slice.m_weightPredTable[list][ref];
1033                 if (wp[0].bPresentFlag)
1034                 {
1035                     int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1036                     WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1037                     WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1038                 }
1039
1040                 if (bChroma)
1041                 {
1042                     if (wp[1].bPresentFlag)
1043                     {
1044                         for (int plane = 1; plane < 3; plane++)
1045                         {
1046                             int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1047                             WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1048
1049                             int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1050                             int deltaChroma = (wp[plane].inputOffset - pred);
1051                             WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1052                         }
1053                     }
1054                 }
1055             }
1056         }
1057
1058         X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1059     }
1060 }
1061
1062 void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1063 {
1064     X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1065
1066     encodeBin(symbol ? 1 : 0, scmModel[0]);
1067
1068     if (!symbol)
1069         return;
1070
1071     bool bCodeLast = (maxSymbol > symbol);
1072
1073     while (--symbol)
1074         encodeBin(1, scmModel[offset]);
1075
1076     if (bCodeLast)
1077         encodeBin(0, scmModel[offset]);
1078 }
1079
1080 void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1081 {
1082     uint32_t bins = 0;
1083     int numBins = 0;
1084
1085     while (symbol >= (uint32_t)(1 << count))
1086     {
1087         bins = 2 * bins + 1;
1088         numBins++;
1089         symbol -= 1 << count;
1090         count++;
1091     }
1092
1093     bins = 2 * bins + 0;
1094     numBins++;
1095
1096     bins = (bins << count) | symbol;
1097     numBins += count;
1098
1099     X265_CHECK(numBins <= 32, "numBins too large\n");
1100     encodeBinsEP(bins, numBins);
1101 }
1102
1103 /** Coding of coeff_abs_level_minus3 */
1104 void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1105 {
1106     uint32_t length;
1107     const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1108
1109     if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1110     {
1111         length = codeNumber >> absGoRice;
1112
1113         X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1114         X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1115         encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1116     }
1117     else
1118     {
1119         length = 0;
1120         codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1121         if (codeNumber != 0)
1122         {
1123             unsigned long idx;
1124             CLZ(idx, codeNumber + 1);
1125             length = idx;
1126             codeNumber -= (1 << idx) - 1;
1127         }
1128         codeNumber = (codeNumber << absGoRice) + codeRemain;
1129
1130         encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1131         encodeBinsEP(codeNumber, length + absGoRice);
1132     }
1133 }
1134
1135 // SBAC RD
1136 void Entropy::loadIntraDirModeLuma(const Entropy& src)
1137 {
1138     X265_CHECK(src.m_valid, "invalid copy source context\n");
1139     m_fracBits = src.m_fracBits;
1140     m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1141 }
1142
1143 void Entropy::copyFrom(const Entropy& src)
1144 {
1145     X265_CHECK(src.m_valid, "invalid copy source context\n");
1146
1147     copyState(src);
1148
1149     memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1150     markValid();
1151 }
1152
1153 void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1154 {
1155     PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1156
1157     if (cu.isIntra(absPartIdx))
1158     {
1159         if (depth == g_maxCUDepth)
1160             encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1161         return;
1162     }
1163
1164     switch (partSize)
1165     {
1166     case SIZE_2Nx2N:
1167         encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1168         break;
1169
1170     case SIZE_2NxN:
1171     case SIZE_2NxnU:
1172     case SIZE_2NxnD:
1173         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1174         encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1175         if (cu.m_slice->m_sps->maxAMPDepth > depth)
1176         {
1177             encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1178             if (partSize != SIZE_2NxN)
1179                 encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1180         }
1181         break;
1182
1183     case SIZE_Nx2N:
1184     case SIZE_nLx2N:
1185     case SIZE_nRx2N:
1186         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1187         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1188         if (depth == g_maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1189             encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1190         if (cu.m_slice->m_sps->maxAMPDepth > depth)
1191         {
1192             encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1193             if (partSize != SIZE_Nx2N)
1194                 encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1195         }
1196         break;
1197     default:
1198         X265_CHECK(0, "invalid CU partition\n");
1199         break;
1200     }
1201 }
1202
1203 void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1204 {
1205     uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1206
1207     if (numCand > 1)
1208     {
1209         uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx
1210         encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1211
1212         X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1213
1214         if (unaryIdx != 0)
1215         {
1216             uint32_t mask = (1 << unaryIdx) - 2;
1217             mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1218             encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1219         }
1220     }
1221 }
1222
1223 void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1224 {
1225     uint32_t dir[4], j;
1226     uint32_t preds[4][3];
1227     int predIdx[4];
1228     uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
1229     uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1230
1231     for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
1232     {
1233         dir[j] = cu.m_lumaIntraDir[absPartIdx];
1234         cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
1235         predIdx[j] = -1;
1236         for (uint32_t i = 0; i < 3; i++)
1237             if (dir[j] == preds[j][i])
1238                 predIdx[j] = i;
1239
1240         encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
1241     }
1242
1243     for (j = 0; j < partNum; j++)
1244     {
1245         if (predIdx[j] != -1)
1246         {
1247             X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
1248             // NOTE: Mapping
1249             //       0 = 0
1250             //       1 = 10
1251             //       2 = 11
1252             int nonzero = (!!predIdx[j]);
1253             encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
1254         }
1255         else
1256         {
1257             if (preds[j][0] > preds[j][1])
1258                 std::swap(preds[j][0], preds[j][1]);
1259
1260             if (preds[j][0] > preds[j][2])
1261                 std::swap(preds[j][0], preds[j][2]);
1262
1263             if (preds[j][1] > preds[j][2])
1264                 std::swap(preds[j][1], preds[j][2]);
1265
1266             dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
1267             dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
1268             dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
1269
1270             encodeBinsEP(dir[j], 5);
1271         }
1272     }
1273 }
1274
1275 void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
1276 {
1277     uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
1278
1279     if (intraDirChroma == DM_CHROMA_IDX)
1280         encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
1281     else
1282     {
1283         for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
1284         {
1285             if (intraDirChroma == chromaDirMode[i])
1286             {
1287                 intraDirChroma = i;
1288                 break;
1289             }
1290         }
1291
1292         encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
1293         encodeBinsEP(intraDirChroma, 2);
1294     }
1295 }
1296
1297 void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
1298 {
1299     const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
1300     const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
1301
1302     if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
1303         encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
1304     if (interDir < 2)
1305         encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
1306 }
1307
1308 void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
1309 {
1310     uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
1311
1312     encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
1313
1314     if (refFrame > 0)
1315     {
1316         uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
1317         if (refNum == 0)
1318             return;
1319
1320         refFrame--;
1321         encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
1322         if (refFrame > 0)
1323         {
1324             uint32_t mask = (1 << refFrame) - 2;
1325             mask >>= (refFrame == refNum) ? 1 : 0;
1326             encodeBinsEP(mask, refFrame - (refFrame == refNum));
1327         }
1328     }
1329 }
1330
1331 void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
1332 {
1333     const MV& mvd = cu.m_mvd[list][absPartIdx];
1334     const int hor = mvd.x;
1335     const int ver = mvd.y;
1336
1337     encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1338     encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1339
1340     const bool bHorAbsGr0 = hor != 0;
1341     const bool bVerAbsGr0 = ver != 0;
1342     const uint32_t horAbs   = 0 > hor ? -hor : hor;
1343     const uint32_t verAbs   = 0 > ver ? -ver : ver;
1344
1345     if (bHorAbsGr0)
1346         encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1347
1348     if (bVerAbsGr0)
1349         encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1350
1351     if (bHorAbsGr0)
1352     {
1353         if (horAbs > 1)
1354             writeEpExGolomb(horAbs - 2, 1);
1355
1356         encodeBinEP(0 > hor ? 1 : 0);
1357     }
1358
1359     if (bVerAbsGr0)
1360     {
1361         if (verAbs > 1)
1362             writeEpExGolomb(verAbs - 2, 1);
1363
1364         encodeBinEP(0 > ver ? 1 : 0);
1365     }
1366 }
1367
1368 void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
1369 {
1370     int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
1371
1372     int qpBdOffsetY = QP_BD_OFFSET;
1373
1374     dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
1375
1376     uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
1377     uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
1378     writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
1379     if (absDQp >= CU_DQP_TU_CMAX)
1380         writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
1381
1382     if (absDQp > 0)
1383     {
1384         uint32_t sign = (dqp > 0 ? 0 : 1);
1385         encodeBinEP(sign);
1386     }
1387 }
1388
1389 void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
1390 {
1391     uint32_t ctx = tuDepth + 2;
1392
1393     uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
1394     bool canQuadSplit       = (log2TrSize - cu.m_hChromaShift > 2);
1395     uint32_t lowestTUDepth  = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
1396
1397     if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
1398     {
1399         uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
1400                                                           // Otherwise, this must be the level above the lowest level (as specified above)
1401         uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
1402
1403         encodeBin(cu.getCbf(absPartIdx             , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1404         encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1405     }
1406     else
1407         encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1408 }
1409
1410 void Entropy::codeTransformSkipFlags(const CUData& cu, uint32_t absPartIdx, uint32_t trSize, TextType ttype)
1411 {
1412     if (cu.m_tqBypass[absPartIdx])
1413         return;
1414     if (trSize != 4)
1415         return;
1416
1417     uint32_t useTransformSkip = cu.m_transformSkip[ttype][absPartIdx];
1418     encodeBin(useTransformSkip, m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX + (ttype ? NUM_TRANSFORMSKIP_FLAG_CTX : 0)]);
1419 }
1420
1421 /** Encode (X,Y) position of the last significant coefficient
1422  * \param posx X component of last coefficient
1423  * \param posy Y component of last coefficient
1424  * \param log2TrSize
1425  * \param bIsLuma
1426  * \param scanIdx scan type (zig-zag, hor, ver)
1427  * This method encodes the X and Y component within a block of the last significant coefficient.
1428  */
1429 void Entropy::codeLastSignificantXY(uint32_t posx, uint32_t posy, uint32_t log2TrSize, bool bIsLuma, uint32_t scanIdx)
1430 {
1431     // swap
1432     if (scanIdx == SCAN_VER)
1433         std::swap(posx, posy);
1434
1435     uint32_t ctxLast;
1436     uint32_t groupIdxX = getGroupIdx(posx);
1437     uint32_t groupIdxY = getGroupIdx(posy);
1438
1439     int blkSizeOffset = bIsLuma ? ((log2TrSize - 2) * 3 + ((log2TrSize - 1) >> 2)) : NUM_CTX_LAST_FLAG_XY_LUMA;
1440     int ctxShift = bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2;
1441     uint32_t maxGroupIdx = log2TrSize * 2 - 1;
1442
1443     // posX
1444     uint8_t *ctxX = &m_contextState[OFF_CTX_LAST_FLAG_X];
1445     for (ctxLast = 0; ctxLast < groupIdxX; ctxLast++)
1446         encodeBin(1, *(ctxX + blkSizeOffset + (ctxLast >> ctxShift)));
1447
1448     if (groupIdxX < maxGroupIdx)
1449         encodeBin(0, *(ctxX + blkSizeOffset + (ctxLast >> ctxShift)));
1450
1451     // posY
1452     uint8_t *ctxY = &m_contextState[OFF_CTX_LAST_FLAG_Y];
1453     for (ctxLast = 0; ctxLast < groupIdxY; ctxLast++)
1454         encodeBin(1, *(ctxY + blkSizeOffset + (ctxLast >> ctxShift)));
1455
1456     if (groupIdxY < maxGroupIdx)
1457         encodeBin(0, *(ctxY + blkSizeOffset + (ctxLast >> ctxShift)));
1458
1459     if (groupIdxX > 3)
1460     {
1461         uint32_t count = (groupIdxX - 2) >> 1;
1462         posx = posx - g_minInGroup[groupIdxX];
1463         encodeBinsEP(posx, count);
1464     }
1465     if (groupIdxY > 3)
1466     {
1467         uint32_t count = (groupIdxY - 2) >> 1;
1468         posy = posy - g_minInGroup[groupIdxY];
1469         encodeBinsEP(posy, count);
1470     }
1471 }
1472
1473 void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
1474 {
1475     uint32_t trSize = 1 << log2TrSize;
1476
1477     // compute number of significant coefficients
1478     uint32_t numSig = primitives.count_nonzero(coeff, (1 << (log2TrSize << 1)));
1479
1480     X265_CHECK(numSig > 0, "cbf check fail\n");
1481
1482     bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled && !cu.m_tqBypass[absPartIdx];
1483
1484     if (cu.m_slice->m_pps->bTransformSkipEnabled)
1485         codeTransformSkipFlags(cu, absPartIdx, trSize, ttype);
1486
1487     bool bIsLuma = ttype == TEXT_LUMA;
1488
1489     // select scans
1490     TUEntropyCodingParameters codingParameters;
1491     cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
1492
1493     //----- encode significance map -----
1494
1495     // Find position of last coefficient
1496     int scanPosLast = 0;
1497     uint32_t posLast;
1498     uint64_t sigCoeffGroupFlag64 = 0;
1499     const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
1500     assert((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1));
1501     do
1502     {
1503         posLast = codingParameters.scan[scanPosLast++];
1504
1505         const uint32_t isNZCoeff = (coeff[posLast] != 0);
1506         // get L1 sig map
1507         // NOTE: the new algorithm is complicated, so I keep reference code here
1508         //uint32_t posy   = posLast >> log2TrSize;
1509         //uint32_t posx   = posLast - (posy << log2TrSize);
1510         //uint32_t blkIdx0 = ((posy >> MLS_CG_LOG2_SIZE) << codingParameters.log2TrSizeCG) + (posx >> MLS_CG_LOG2_SIZE);
1511         const uint32_t blkIdx = ((posLast >> (2 * MLS_CG_LOG2_SIZE)) & ~maskPosXY) + ((posLast >> MLS_CG_LOG2_SIZE) & maskPosXY);
1512         sigCoeffGroupFlag64 |= ((uint64_t)isNZCoeff << blkIdx);
1513         numSig -= isNZCoeff;
1514     }
1515     while (numSig > 0);
1516     scanPosLast--;
1517
1518     // Code position of last coefficient
1519     int posLastY = posLast >> log2TrSize;
1520     int posLastX = posLast & (trSize - 1);
1521     codeLastSignificantXY(posLastX, posLastY, log2TrSize, bIsLuma, codingParameters.scanType);
1522
1523     //===== code significance flag =====
1524     uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
1525     uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
1526     const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
1527     uint32_t c1 = 1;
1528     uint32_t goRiceParam = 0;
1529     int scanPosSig = scanPosLast;
1530
1531     for (int subSet = lastScanSet; subSet >= 0; subSet--)
1532     {
1533         int numNonZero = 0;
1534         int subPos     = subSet << MLS_CG_SIZE;
1535         goRiceParam    = 0;
1536         int absCoeff[1 << MLS_CG_SIZE];
1537         uint32_t coeffSigns = 0;
1538         int lastNZPosInCG = -1;
1539         int firstNZPosInCG = 1 << MLS_CG_SIZE;
1540         if (scanPosSig == scanPosLast)
1541         {
1542             absCoeff[0] = int(abs(coeff[posLast]));
1543             coeffSigns  = (coeff[posLast] < 0);
1544             numNonZero  = 1;
1545             lastNZPosInCG  = scanPosSig;
1546             firstNZPosInCG = scanPosSig;
1547             scanPosSig--;
1548         }
1549         // encode significant_coeffgroup_flag
1550         const int cgBlkPos = codingParameters.scanCG[subSet];
1551         const int cgPosY   = cgBlkPos >> codingParameters.log2TrSizeCG;
1552         const int cgPosX   = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
1553         const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
1554
1555         if (subSet == lastScanSet || !subSet)
1556             sigCoeffGroupFlag64 |= cgBlkPosMask;
1557         else
1558         {
1559             uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
1560             uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
1561             encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
1562         }
1563
1564         // encode significant_coeff_flag
1565         if (sigCoeffGroupFlag64 & cgBlkPosMask)
1566         {
1567             const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
1568             uint32_t blkPos, sig, ctxSig;
1569             for (; scanPosSig >= subPos; scanPosSig--)
1570             {
1571                 blkPos  = codingParameters.scan[scanPosSig];
1572                 sig     = (coeff[blkPos] != 0);
1573                 if (scanPosSig > subPos || subSet == 0 || numNonZero)
1574                 {
1575                     ctxSig = Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext);
1576                     encodeBin(sig, baseCtx[ctxSig]);
1577                 }
1578                 if (sig)
1579                 {
1580                     absCoeff[numNonZero] = int(abs(coeff[blkPos]));
1581                     coeffSigns = 2 * coeffSigns + ((uint32_t)coeff[blkPos] >> 31);
1582                     numNonZero++;
1583                     if (lastNZPosInCG < 0)
1584                         lastNZPosInCG = scanPosSig;
1585                     firstNZPosInCG = scanPosSig;
1586                 }
1587             }
1588         }
1589         else
1590             scanPosSig = subPos - 1;
1591
1592         if (numNonZero > 0)
1593         {
1594             bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
1595             uint32_t ctxSet = (subSet > 0 && bIsLuma) ? 2 : 0;
1596
1597             if (c1 == 0)
1598                 ctxSet++;
1599
1600             c1 = 1;
1601             uint8_t *baseCtxMod = bIsLuma ? &m_contextState[OFF_ONE_FLAG_CTX + 4 * ctxSet] : &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA + 4 * ctxSet];
1602
1603             int numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
1604             int firstC2FlagIdx = -1;
1605             for (int idx = 0; idx < numC1Flag; idx++)
1606             {
1607                 uint32_t symbol = absCoeff[idx] > 1;
1608                 encodeBin(symbol, baseCtxMod[c1]);
1609                 if (symbol)
1610                 {
1611                     c1 = 0;
1612
1613                     if (firstC2FlagIdx == -1)
1614                         firstC2FlagIdx = idx;
1615                 }
1616                 else if ((c1 < 3) && (c1 > 0))
1617                     c1++;
1618             }
1619
1620             if (!c1)
1621             {
1622                 baseCtxMod = bIsLuma ? &m_contextState[OFF_ABS_FLAG_CTX + ctxSet] : &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA + ctxSet];
1623                 if (firstC2FlagIdx != -1)
1624                 {
1625                     uint32_t symbol = absCoeff[firstC2FlagIdx] > 2;
1626                     encodeBin(symbol, baseCtxMod[0]);
1627                 }
1628             }
1629
1630             if (bHideFirstSign && signHidden)
1631                 encodeBinsEP((coeffSigns >> 1), numNonZero - 1);
1632             else
1633                 encodeBinsEP(coeffSigns, numNonZero);
1634
1635             int firstCoeff2 = 1;
1636             if (!c1 || numNonZero > C1FLAG_NUMBER)
1637             {
1638                 for (int idx = 0; idx < numNonZero; idx++)
1639                 {
1640                     int baseLevel = (idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1;
1641
1642                     if (absCoeff[idx] >= baseLevel)
1643                     {
1644                         writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
1645                         if (absCoeff[idx] > 3 * (1 << goRiceParam))
1646                             goRiceParam = std::min<uint32_t>(goRiceParam + 1, 4);
1647                     }
1648                     if (absCoeff[idx] >= 2)
1649                         firstCoeff2 = 0;
1650                 }
1651             }
1652         }
1653     }
1654 }
1655
1656 void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
1657 {
1658     X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1659
1660     uint32_t isCodeNonZero = !!code;
1661
1662     encodeBinEP(isCodeNonZero);
1663     if (isCodeNonZero)
1664     {
1665         uint32_t isCodeLast = (maxSymbol > code);
1666         uint32_t mask = (1 << (code - 1)) - 1;
1667         uint32_t len = code - 1 + isCodeLast;
1668         mask <<= isCodeLast;
1669
1670         encodeBinsEP(mask, len);
1671     }
1672 }
1673
1674 /* estimate bit cost for CBP, significant map and significant coefficients */
1675 void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
1676 {
1677     estCBFBit(estBitsSbac);
1678
1679     estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
1680
1681     // encode significance map
1682     estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
1683
1684     // encode significant coefficients
1685     estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
1686 }
1687
1688 /* estimate bit cost for each CBP bit */
1689 void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
1690 {
1691     const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
1692
1693     for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
1694     {
1695         estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
1696         estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
1697     }
1698
1699     ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
1700
1701     estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
1702     estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
1703 }
1704
1705 /* estimate SAMBAC bit cost for significant coefficient group map */
1706 void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
1707 {
1708     int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
1709
1710     for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
1711         for (uint32_t bin = 0; bin < 2; bin++)
1712             estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
1713 }
1714
1715 /* estimate SAMBAC bit cost for significant coefficient map */
1716 void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
1717 {
1718     int firstCtx = 1, numCtx = 8;
1719
1720     if (log2TrSize >= 4)
1721     {
1722         firstCtx = bIsLuma ? 21 : 12;
1723         numCtx = bIsLuma ? 6 : 3;
1724     }
1725     else if (log2TrSize == 3)
1726     {
1727         firstCtx = 9;
1728         numCtx = bIsLuma ? 12 : 3;
1729     }
1730
1731     if (bIsLuma)
1732     {
1733         for (uint32_t bin = 0; bin < 2; bin++)
1734             estBitsSbac.significantBits[0][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX], bin);
1735
1736         for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
1737             for (uint32_t bin = 0; bin < 2; bin++)
1738                 estBitsSbac.significantBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX + ctxIdx], bin);
1739     }
1740     else
1741     {
1742         for (uint32_t bin = 0; bin < 2; bin++)
1743             estBitsSbac.significantBits[0][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX + (NUM_SIG_FLAG_CTX_LUMA + 0)], bin);
1744
1745         for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
1746             for (uint32_t bin = 0; bin < 2; bin++)
1747                 estBitsSbac.significantBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX + (NUM_SIG_FLAG_CTX_LUMA + ctxIdx)], bin);
1748     }
1749     int bitsX = 0, bitsY = 0;
1750
1751     int blkSizeOffset = bIsLuma ? ((log2TrSize - 2) * 3 + ((log2TrSize - 1) >> 2)) : NUM_CTX_LAST_FLAG_XY_LUMA;
1752     int ctxShift = bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2;
1753     uint32_t maxGroupIdx = log2TrSize * 2 - 1;
1754
1755     uint32_t ctx;
1756     const uint8_t *ctxX = &m_contextState[OFF_CTX_LAST_FLAG_X];
1757     for (ctx = 0; ctx < maxGroupIdx; ctx++)
1758     {
1759         int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
1760         estBitsSbac.lastXBits[ctx] = bitsX + sbacGetEntropyBits(ctxX[ctxOffset], 0);
1761         bitsX += sbacGetEntropyBits(ctxX[ctxOffset], 1);
1762     }
1763
1764     estBitsSbac.lastXBits[ctx] = bitsX;
1765
1766     const uint8_t *ctxY = &m_contextState[OFF_CTX_LAST_FLAG_Y];
1767     for (ctx = 0; ctx < maxGroupIdx; ctx++)
1768     {
1769         int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
1770         estBitsSbac.lastYBits[ctx] = bitsY + sbacGetEntropyBits(ctxY[ctxOffset], 0);
1771         bitsY += sbacGetEntropyBits(ctxY[ctxOffset], 1);
1772     }
1773
1774     estBitsSbac.lastYBits[ctx] = bitsY;
1775 }
1776
1777 /* estimate bit cost of significant coefficient */
1778 void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
1779 {
1780     if (bIsLuma)
1781     {
1782         const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
1783         const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
1784
1785         for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
1786         {
1787             estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
1788             estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
1789         }
1790
1791         for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
1792         {
1793             estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
1794             estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
1795         }
1796     }
1797     else
1798     {
1799         const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
1800         const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
1801
1802         for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
1803         {
1804             estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
1805             estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
1806         }
1807
1808         for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
1809         {
1810             estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
1811             estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
1812         }
1813     }
1814 }
1815
1816 /* Initialize our context information from the nominated source */
1817 void Entropy::copyContextsFrom(const Entropy& src)
1818 {
1819     X265_CHECK(src.m_valid, "invalid copy source context\n");
1820
1821     memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
1822     markValid();
1823 }
1824
1825 void Entropy::start()
1826 {
1827     m_low = 0;
1828     m_range = 510;
1829     m_bitsLeft = -12;
1830     m_numBufferedBytes = 0;
1831     m_bufferedByte = 0xff;
1832 }
1833
1834 void Entropy::finish()
1835 {
1836     if (m_low >> (21 + m_bitsLeft))
1837     {
1838         m_bitIf->writeByte(m_bufferedByte + 1);
1839         while (m_numBufferedBytes > 1)
1840         {
1841             m_bitIf->writeByte(0x00);
1842             m_numBufferedBytes--;
1843         }
1844
1845         m_low -= 1 << (21 + m_bitsLeft);
1846     }
1847     else
1848     {
1849         if (m_numBufferedBytes > 0)
1850             m_bitIf->writeByte(m_bufferedByte);
1851
1852         while (m_numBufferedBytes > 1)
1853         {
1854             m_bitIf->writeByte(0xff);
1855             m_numBufferedBytes--;
1856         }
1857     }
1858     m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
1859 }
1860
1861 void Entropy::copyState(const Entropy& other)
1862 {
1863     m_low = other.m_low;
1864     m_range = other.m_range;
1865     m_bitsLeft = other.m_bitsLeft;
1866     m_bufferedByte = other.m_bufferedByte;
1867     m_numBufferedBytes = other.m_numBufferedBytes;
1868     m_fracBits = other.m_fracBits;
1869 }
1870
1871 void Entropy::resetBits()
1872 {
1873     m_low = 0;
1874     m_bitsLeft = -12;
1875     m_numBufferedBytes = 0;
1876     m_bufferedByte = 0xff;
1877     m_fracBits &= 32767;
1878     if (m_bitIf)
1879         m_bitIf->resetBits();
1880 }
1881
1882 /** Encode bin */
1883 void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
1884 {
1885     uint32_t mstate = ctxModel;
1886
1887     ctxModel = sbacNext(mstate, binValue);
1888
1889     if (!m_bitIf)
1890     {
1891         m_fracBits += sbacGetEntropyBits(mstate, binValue);
1892         return;
1893     }
1894
1895     uint32_t range = m_range;
1896     uint32_t state = sbacGetState(mstate);
1897     uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
1898     range -= lps;
1899
1900     X265_CHECK(lps >= 2, "lps is too small\n");
1901
1902     int numBits = (uint32_t)(range - 256) >> 31;
1903     uint32_t low = m_low;
1904
1905     // NOTE: MPS must be LOWEST bit in mstate
1906     X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
1907     if ((binValue ^ mstate) & 1)
1908     {
1909         // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
1910         //numBits = g_renormTable[lps >> 3];
1911         unsigned long idx;
1912         CLZ(idx, lps);
1913         X265_CHECK(state != 63 || idx == 1, "state failure\n");
1914
1915         numBits = 8 - idx;
1916         if (state >= 63)
1917             numBits = 6;
1918         X265_CHECK(numBits <= 6, "numBits failure\n");
1919
1920         low += range;
1921         range = lps;
1922     }
1923     m_low = (low << numBits);
1924     m_range = (range << numBits);
1925     m_bitsLeft += numBits;
1926
1927     if (m_bitsLeft >= 0)
1928         writeOut();
1929 }
1930
1931 /** Encode equiprobable bin */
1932 void Entropy::encodeBinEP(uint32_t binValue)
1933 {
1934     if (!m_bitIf)
1935     {
1936         m_fracBits += 32768;
1937         return;
1938     }
1939     m_low <<= 1;
1940     if (binValue)
1941         m_low += m_range;
1942     m_bitsLeft++;
1943
1944     if (m_bitsLeft >= 0)
1945         writeOut();
1946 }
1947
1948 /** Encode equiprobable bins */
1949 void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
1950 {
1951     if (!m_bitIf)
1952     {
1953         m_fracBits += 32768 * numBins;
1954         return;
1955     }
1956
1957     while (numBins > 8)
1958     {
1959         numBins -= 8;
1960         uint32_t pattern = binValues >> numBins;
1961         m_low <<= 8;
1962         m_low += m_range * pattern;
1963         binValues -= pattern << numBins;
1964         m_bitsLeft += 8;
1965
1966         if (m_bitsLeft >= 0)
1967             writeOut();
1968     }
1969
1970     m_low <<= numBins;
1971     m_low += m_range * binValues;
1972     m_bitsLeft += numBins;
1973
1974     if (m_bitsLeft >= 0)
1975         writeOut();
1976 }
1977
1978 /** Encode terminating bin */
1979 void Entropy::encodeBinTrm(uint32_t binValue)
1980 {
1981     if (!m_bitIf)
1982     {
1983         m_fracBits += sbacGetEntropyBitsTrm(binValue);
1984         return;
1985     }
1986
1987     m_range -= 2;
1988     if (binValue)
1989     {
1990         m_low += m_range;
1991         m_low <<= 7;
1992         m_range = 2 << 7;
1993         m_bitsLeft += 7;
1994     }
1995     else if (m_range >= 256)
1996         return;
1997     else
1998     {
1999         m_low <<= 1;
2000         m_range <<= 1;
2001         m_bitsLeft++;
2002     }
2003
2004     if (m_bitsLeft >= 0)
2005         writeOut();
2006 }
2007
2008 /** Move bits from register into bitstream */
2009 void Entropy::writeOut()
2010 {
2011     uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2012     uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2013
2014     m_bitsLeft -= 8;
2015     m_low &= low_mask;
2016
2017     if (leadByte == 0xff)
2018         m_numBufferedBytes++;
2019     else
2020     {
2021         uint32_t numBufferedBytes = m_numBufferedBytes;
2022         if (numBufferedBytes > 0)
2023         {
2024             uint32_t carry = leadByte >> 8;
2025             uint32_t byteTowrite = m_bufferedByte + carry;
2026             m_bitIf->writeByte(byteTowrite);
2027
2028             byteTowrite = (0xff + carry) & 0xff;
2029             while (numBufferedBytes > 1)
2030             {
2031                 m_bitIf->writeByte(byteTowrite);
2032                 numBufferedBytes--;
2033             }
2034         }
2035         m_numBufferedBytes = 1;
2036         m_bufferedByte = (uint8_t)leadByte;
2037     }
2038 }
2039
2040 const uint32_t g_entropyBits[128] =
2041 {
2042     // Corrected table, most notably for last state
2043     0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
2044     0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
2045     0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
2046     0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
2047     0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
2048     0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
2049     0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
2050     0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
2051 };
2052
2053 const uint8_t g_nextState[128][2] =
2054 {
2055     { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
2056     { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
2057     { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
2058     { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
2059     { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
2060     { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
2061     { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
2062     { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
2063     { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
2064     { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
2065     { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
2066     { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
2067     { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
2068     { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
2069     { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
2070     { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
2071 };
2072
2073 }