source/encoder/entropy.cpp

   1 /*****************************************************************************
   2 * Copyright (C) 2013 x265 project
   3 *
   4 * Authors: Steve Borho <steve@borho.org>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  19 *
  20 * This program is also available under a commercial proprietary license.
  21 * For more information, contact us at license @ x265.com.
  22 *****************************************************************************/
  23
  24 #include "common.h"
  25 #include "framedata.h"
  26 #include "scalinglist.h"
  27 #include "quant.h"
  28 #include "contexts.h"
  29 #include "picyuv.h"
  30
  31 #include "sao.h"
  32 #include "entropy.h"
  33
  34 #define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
  35 #define CU_DQP_EG_k    0 // exp-golomb order
  36 #define START_VALUE    8 // start value for dpcm mode
  37
  38 static const uint32_t g_puOffset[8] = { 0, 8, 4, 4, 2, 10, 1, 5 };
  39
  40 namespace x265 {
  41
  42 Entropy::Entropy()
  43 {
  44     markValid();
  45     m_fracBits = 0;
  46     X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
  47 }
  48
  49 void Entropy::codeVPS(const VPS& vps)
  50 {
  51     WRITE_CODE(0,       4, "vps_video_parameter_set_id");
  52     WRITE_CODE(3,       2, "vps_reserved_three_2bits");
  53     WRITE_CODE(0,       6, "vps_reserved_zero_6bits");
  54     WRITE_CODE(0,       3, "vps_max_sub_layers_minus1");
  55     WRITE_FLAG(1,          "vps_temporal_id_nesting_flag");
  56     WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
  57
  58     codeProfileTier(vps.ptl);
  59
  60     WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
  61     WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
  62     WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
  63
  64     WRITE_UVLC(0,    "vps_max_latency_increase_plus1[i]");
  65     WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
  66     WRITE_UVLC(0,    "vps_max_op_sets_minus1");
  67     WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
  68     WRITE_FLAG(0,    "vps_extension_flag");
  69 }
  70
  71 void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
  72 {
  73     WRITE_CODE(0, 4, "sps_video_parameter_set_id");
  74     WRITE_CODE(0, 3, "sps_max_sub_layers_minus1");
  75     WRITE_FLAG(1,    "sps_temporal_id_nesting_flag");
  76
  77     codeProfileTier(ptl);
  78
  79     WRITE_UVLC(0, "sps_seq_parameter_set_id");
  80     WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
  81
  82     if (sps.chromaFormatIdc == X265_CSP_I444)
  83         WRITE_FLAG(0,                        "separate_colour_plane_flag");
  84
  85     WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
  86     WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
  87
  88     const Window& conf = sps.conformanceWindow;
  89     WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
  90     if (conf.bEnabled)
  91     {
  92         int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
  93         WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
  94         WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
  95         WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
  96         WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
  97     }
  98
  99     WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
 100     WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
 101     WRITE_UVLC(BITS_FOR_POC - 4, "log2_max_pic_order_cnt_lsb_minus4");
 102     WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
 103
 104     WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
 105     WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
 106     WRITE_UVLC(0,                          "sps_max_latency_increase_plus1[i]");
 107
 108     WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
 109     WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
 110     WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
 111     WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
 112     WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
 113     WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
 114     WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
 115     if (scalingList.m_bEnabled)
 116     {
 117         WRITE_FLAG(scalingList.m_bDataPresent,    "sps_scaling_list_data_present_flag");
 118         if (scalingList.m_bDataPresent)
 119             codeScalingList(scalingList);
 120     }
 121     WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
 122     WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
 123
 124     WRITE_FLAG(0, "pcm_enabled_flag");
 125     WRITE_UVLC(0, "num_short_term_ref_pic_sets");
 126     WRITE_FLAG(0, "long_term_ref_pics_present_flag");
 127
 128     WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
 129     WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
 130
 131     WRITE_FLAG(1, "vui_parameters_present_flag");
 132     codeVUI(sps.vuiParameters);
 133
 134     WRITE_FLAG(0, "sps_extension_flag");
 135 }
 136
 137 void Entropy::codePPS(const PPS& pps)
 138 {
 139     WRITE_UVLC(0,                          "pps_pic_parameter_set_id");
 140     WRITE_UVLC(0,                          "pps_seq_parameter_set_id");
 141     WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
 142     WRITE_FLAG(0,                          "output_flag_present_flag");
 143     WRITE_CODE(0, 3,                       "num_extra_slice_header_bits");
 144     WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
 145     WRITE_FLAG(0,                          "cabac_init_present_flag");
 146     WRITE_UVLC(0,                          "num_ref_idx_l0_default_active_minus1");
 147     WRITE_UVLC(0,                          "num_ref_idx_l1_default_active_minus1");
 148
 149     WRITE_SVLC(0, "init_qp_minus26");
 150     WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
 151     WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
 152
 153     WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
 154     if (pps.bUseDQP)
 155         WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
 156
 157     WRITE_SVLC(pps.chromaCbQpOffset,       "pps_cb_qp_offset");
 158     WRITE_SVLC(pps.chromaCrQpOffset,       "pps_cr_qp_offset");
 159     WRITE_FLAG(0,                          "pps_slice_chroma_qp_offsets_present_flag");
 160
 161     WRITE_FLAG(pps.bUseWeightPred,            "weighted_pred_flag");
 162     WRITE_FLAG(pps.bUseWeightedBiPred,        "weighted_bipred_flag");
 163     WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
 164     WRITE_FLAG(0,                             "tiles_enabled_flag");
 165     WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
 166     WRITE_FLAG(1,                             "loop_filter_across_slices_enabled_flag");
 167
 168     WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
 169     if (pps.bDeblockingFilterControlPresent)
 170     {
 171         WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
 172         WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
 173         if (!pps.bPicDisableDeblockingFilter)
 174         {
 175             WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
 176             WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
 177         }
 178     }
 179
 180     WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
 181     WRITE_FLAG(0, "lists_modification_present_flag");
 182     WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
 183     WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
 184     WRITE_FLAG(0, "pps_extension_flag");
 185 }
 186
 187 void Entropy::codeProfileTier(const ProfileTierLevel& ptl)
 188 {
 189     WRITE_CODE(0, 2,                "XXX_profile_space[]");
 190     WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
 191     WRITE_CODE(ptl.profileIdc, 5,   "XXX_profile_idc[]");
 192     for (int j = 0; j < 32; j++)
 193         WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
 194
 195     WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
 196     WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
 197     WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
 198     WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
 199
 200     if (ptl.profileIdc == Profile::MAINREXT || ptl.profileIdc == Profile::HIGHTHROUGHPUTREXT)
 201     {
 202         uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
 203         int csp = ptl.chromaFormatConstraint;
 204         WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
 205         WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
 206         WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
 207         WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
 208         WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
 209         WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
 210         WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
 211         WRITE_FLAG(0,                              "general_one_picture_only_constraint_flag");
 212         WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
 213         WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[0..15]");
 214         WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[16..31]");
 215         WRITE_CODE(0 ,  3, "XXX_reserved_zero_35bits[32..34]");
 216     }
 217     else
 218     {
 219         WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
 220         WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
 221         WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
 222     }
 223
 224     WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
 225 }
 226
 227 void Entropy::codeVUI(const VUI& vui)
 228 {
 229     WRITE_FLAG(vui.aspectRatioInfoPresentFlag,  "aspect_ratio_info_present_flag");
 230     if (vui.aspectRatioInfoPresentFlag)
 231     {
 232         WRITE_CODE(vui.aspectRatioIdc, 8,       "aspect_ratio_idc");
 233         if (vui.aspectRatioIdc == 255)
 234         {
 235             WRITE_CODE(vui.sarWidth, 16,        "sar_width");
 236             WRITE_CODE(vui.sarHeight, 16,       "sar_height");
 237         }
 238     }
 239
 240     WRITE_FLAG(vui.overscanInfoPresentFlag,     "overscan_info_present_flag");
 241     if (vui.overscanInfoPresentFlag)
 242         WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
 243
 244     WRITE_FLAG(vui.videoSignalTypePresentFlag,  "video_signal_type_present_flag");
 245     if (vui.videoSignalTypePresentFlag)
 246     {
 247         WRITE_CODE(vui.videoFormat, 3,          "video_format");
 248         WRITE_FLAG(vui.videoFullRangeFlag,      "video_full_range_flag");
 249         WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
 250         if (vui.colourDescriptionPresentFlag)
 251         {
 252             WRITE_CODE(vui.colourPrimaries, 8,         "colour_primaries");
 253             WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
 254             WRITE_CODE(vui.matrixCoefficients, 8,      "matrix_coefficients");
 255         }
 256     }
 257
 258     WRITE_FLAG(vui.chromaLocInfoPresentFlag,           "chroma_loc_info_present_flag");
 259     if (vui.chromaLocInfoPresentFlag)
 260     {
 261         WRITE_UVLC(vui.chromaSampleLocTypeTopField,    "chroma_sample_loc_type_top_field");
 262         WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
 263     }
 264
 265     WRITE_FLAG(0,                                     "neutral_chroma_indication_flag");
 266     WRITE_FLAG(vui.fieldSeqFlag,                      "field_seq_flag");
 267     WRITE_FLAG(vui.frameFieldInfoPresentFlag,         "frame_field_info_present_flag");
 268
 269     WRITE_FLAG(vui.defaultDisplayWindow.bEnabled,    "default_display_window_flag");
 270     if (vui.defaultDisplayWindow.bEnabled)
 271     {
 272         WRITE_UVLC(vui.defaultDisplayWindow.leftOffset,   "def_disp_win_left_offset");
 273         WRITE_UVLC(vui.defaultDisplayWindow.rightOffset,  "def_disp_win_right_offset");
 274         WRITE_UVLC(vui.defaultDisplayWindow.topOffset,    "def_disp_win_top_offset");
 275         WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
 276     }
 277
 278     WRITE_FLAG(1,                                 "vui_timing_info_present_flag");
 279     WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
 280     WRITE_CODE(vui.timingInfo.timeScale, 32,      "vui_time_scale");
 281     WRITE_FLAG(0,                                 "vui_poc_proportional_to_timing_flag");
 282
 283     WRITE_FLAG(vui.hrdParametersPresentFlag,  "vui_hrd_parameters_present_flag");
 284     if (vui.hrdParametersPresentFlag)
 285         codeHrdParameters(vui.hrdParameters);
 286
 287     WRITE_FLAG(0, "bitstream_restriction_flag");
 288 }
 289
 290 void Entropy::codeScalingList(const ScalingList& scalingList)
 291 {
 292     for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
 293     {
 294         for (int listId = 0; listId < ScalingList::NUM_LISTS; listId++)
 295         {
 296             int predList = scalingList.checkPredMode(sizeId, listId);
 297             WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
 298             if (predList >= 0)
 299                 WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
 300             else // DPCM Mode
 301                 codeScalingList(scalingList, sizeId, listId);
 302         }
 303     }
 304 }
 305
 306 void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
 307 {
 308     int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
 309     const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
 310     int nextCoef = START_VALUE;
 311     int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
 312     int data;
 313
 314     if (sizeId > BLOCK_8x8)
 315     {
 316         WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
 317         nextCoef = scalingList.m_scalingListDC[sizeId][listId];
 318     }
 319     for (int i = 0; i < coefNum; i++)
 320     {
 321         data = src[scan[i]] - nextCoef;
 322         nextCoef = src[scan[i]];
 323         if (data > 127)
 324             data = data - 256;
 325         if (data < -128)
 326             data = data + 256;
 327
 328         WRITE_SVLC(data,  "scaling_list_delta_coef");
 329     }
 330 }
 331
 332 void Entropy::codeHrdParameters(const HRDInfo& hrd)
 333 {
 334     WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
 335     WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
 336     WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
 337
 338     WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
 339     WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
 340
 341     WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
 342     WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
 343     WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
 344
 345     WRITE_FLAG(1, "fixed_pic_rate_general_flag");
 346     WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
 347     WRITE_UVLC(0, "cpb_cnt_minus1");
 348
 349     WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
 350     WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
 351     WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
 352 }
 353
 354 void Entropy::codeAUD(const Slice& slice)
 355 {
 356     int picType;
 357
 358     switch (slice.m_sliceType)
 359     {
 360     case I_SLICE:
 361         picType = 0;
 362         break;
 363     case P_SLICE:
 364         picType = 1;
 365         break;
 366     case B_SLICE:
 367         picType = 2;
 368         break;
 369     default:
 370         picType = 7;
 371         break;
 372     }
 373
 374     WRITE_CODE(picType, 3, "pic_type");
 375 }
 376
 377 void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData)
 378 {
 379     WRITE_FLAG(1, "first_slice_segment_in_pic_flag");
 380     if (slice.getRapPicFlag())
 381         WRITE_FLAG(0, "no_output_of_prior_pics_flag");
 382
 383     WRITE_UVLC(0, "slice_pic_parameter_set_id");
 384
 385     /* x265 does not use dependent slices, so always write all this data */
 386
 387     WRITE_UVLC(slice.m_sliceType, "slice_type");
 388
 389     if (!slice.getIdrPicFlag())
 390     {
 391         int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << BITS_FOR_POC)) % (1 << BITS_FOR_POC);
 392         WRITE_CODE(picOrderCntLSB, BITS_FOR_POC, "pic_order_cnt_lsb");
 393
 394 #if _DEBUG || CHECKED_BUILD
 395         // check for bitstream restriction stating that:
 396         // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
 397         // Ideally this process should not be repeated for each slice in a picture
 398         if (slice.isIRAP())
 399             for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
 400                 X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
 401 #endif
 402
 403         WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
 404         codeShortTermRefPicSet(slice.m_rps);
 405
 406         if (slice.m_sps->bTemporalMVPEnabled)
 407             WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
 408     }
 409     const SAOParam *saoParam = encData.m_saoParam;
 410     if (slice.m_sps->bUseSAO)
 411     {
 412         WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
 413         WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
 414     }
 415
 416     // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
 417     // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
 418
 419     if (!slice.isIntra())
 420     {
 421         bool overrideFlag = (slice.m_numRefIdx[0] != 1 || (slice.isInterB() && slice.m_numRefIdx[1] != 1));
 422         WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
 423         if (overrideFlag)
 424         {
 425             WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
 426             if (slice.isInterB())
 427                 WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
 428             else
 429             {
 430                 X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
 431             }
 432         }
 433     }
 434     else
 435     {
 436         X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
 437     }
 438
 439     if (slice.isInterB())
 440         WRITE_FLAG(0, "mvd_l1_zero_flag");
 441
 442     if (slice.m_sps->bTemporalMVPEnabled)
 443     {
 444         if (slice.m_sliceType == B_SLICE)
 445             WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
 446
 447         if (slice.m_sliceType != I_SLICE &&
 448             ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
 449             (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
 450         {
 451             WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
 452         }
 453     }
 454     if ((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE))
 455         codePredWeightTable(slice);
 456
 457     X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
 458     if (!slice.isIntra())
 459         WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
 460
 461     int code = slice.m_sliceQp - 26;
 462     WRITE_SVLC(code, "slice_qp_delta");
 463
 464     bool isSAOEnabled = slice.m_sps->bUseSAO ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
 465     bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
 466
 467     if (isSAOEnabled || isDBFEnabled)
 468         WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
 469 }
 470
 471 /** write wavefront substreams sizes for the slice header */
 472 void Entropy::codeSliceHeaderWPPEntryPoints(const Slice& slice, const uint32_t *substreamSizes, uint32_t maxOffset)
 473 {
 474     uint32_t offsetLen = 1;
 475     while (maxOffset >= (1U << offsetLen))
 476     {
 477         offsetLen++;
 478         X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
 479     }
 480
 481     uint32_t numRows = slice.m_sps->numCuInHeight - 1;
 482     WRITE_UVLC(numRows, "num_entry_point_offsets");
 483     if (numRows > 0)
 484         WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
 485
 486     for (uint32_t i = 0; i < numRows; i++)
 487         WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
 488 }
 489
 490 void Entropy::codeShortTermRefPicSet(const RPS& rps)
 491 {
 492     WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
 493     WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
 494     int prev = 0;
 495     for (int j = 0; j < rps.numberOfNegativePictures; j++)
 496     {
 497         WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
 498         prev = rps.deltaPOC[j];
 499         WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
 500     }
 501
 502     prev = 0;
 503     for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
 504     {
 505         WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
 506         prev = rps.deltaPOC[j];
 507         WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
 508     }
 509 }
 510
 511 void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
 512 {
 513     bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
 514     encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
 515 }
 516
 517 /* encode a CU block recursively */
 518 void Entropy::encodeCU(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
 519 {
 520     const Slice* slice = cu.m_slice;
 521
 522     if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
 523         bEncodeDQP = true;
 524
 525     int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
 526     int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
 527
 528     if (!cuUnsplitFlag)
 529     {
 530         uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
 531         for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts)
 532         {
 533             const CUGeom& childCuData = *(&cuGeom + cuGeom.childOffset + subPartIdx);
 534             if (childCuData.flags & CUGeom::PRESENT)
 535                 encodeCU(cu, childCuData, absPartIdx, depth + 1, bEncodeDQP);
 536         }
 537         return;
 538     }
 539
 540     // We need to split, so don't try these modes.
 541     if (cuSplitFlag)
 542         codeSplitFlag(cu, absPartIdx, depth);
 543
 544     if (depth < cu.m_cuDepth[absPartIdx] && depth < g_maxCUDepth)
 545     {
 546         uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
 547
 548         for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++, absPartIdx += qNumParts)
 549         {
 550             const CUGeom& childCuData = *(&cuGeom + cuGeom.childOffset + subPartIdx);
 551             encodeCU(cu, childCuData, absPartIdx, depth + 1, bEncodeDQP);
 552         }
 553         return;
 554     }
 555
 556     if (slice->m_pps->bTransquantBypassEnabled)
 557         codeCUTransquantBypassFlag(cu.m_tqBypass[absPartIdx]);
 558
 559     if (!slice->isIntra())
 560         codeSkipFlag(cu, absPartIdx);
 561
 562     if (cu.isSkipped(absPartIdx))
 563     {
 564         codeMergeIndex(cu, absPartIdx);
 565         finishCU(cu, absPartIdx, depth);
 566         return;
 567     }
 568
 569     if (!slice->isIntra())
 570         codePredMode(cu.m_predMode[absPartIdx]);
 571
 572     codePartSize(cu, absPartIdx, depth);
 573
 574     // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
 575     codePredInfo(cu, absPartIdx);
 576
 577     uint32_t tuDepthRange[2];
 578     if (cu.isIntra(absPartIdx))
 579         cu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
 580     else
 581         cu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
 582
 583     // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
 584     codeCoeff(cu, absPartIdx, depth, bEncodeDQP, tuDepthRange);
 585
 586     // --- write terminating bit ---
 587     finishCU(cu, absPartIdx, depth);
 588 }
 589
 590 /* finish encoding a cu and handle end-of-slice conditions */
 591 void Entropy::finishCU(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
 592 {
 593     const Slice* slice = cu.m_slice;
 594     X265_CHECK(cu.m_slice->m_endCUAddr == cu.m_slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
 595     uint32_t realEndAddress = slice->m_endCUAddr;
 596     uint32_t cuAddr = cu.getSCUAddr() + absPartIdx;
 597
 598     uint32_t granularityMask = g_maxCUSize - 1;
 599     uint32_t cuSize = 1 << cu.m_log2CUSize[absPartIdx];
 600     uint32_t rpelx = cu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
 601     uint32_t bpely = cu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
 602     bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
 603                                 ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
 604
 605     if (granularityBoundary)
 606     {
 607         // Encode slice finish
 608         bool bTerminateSlice = false;
 609         if (cuAddr + (NUM_CU_PARTITIONS >> (depth << 1)) == realEndAddress)
 610             bTerminateSlice = true;
 611
 612         // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
 613         if (!bTerminateSlice)
 614             encodeBinTrm(0);
 615
 616         if (!m_bitIf)
 617             resetBits(); // TODO: most likely unnecessary
 618     }
 619 }
 620
 621 void Entropy::encodeTransform(const CUData& cu, CoeffCodeState& state, uint32_t offsetLuma, uint32_t offsetChroma, uint32_t absPartIdx,
 622                               uint32_t absPartIdxStep, uint32_t depth, uint32_t log2TrSize, uint32_t trIdx, bool& bCodeDQP, uint32_t depthRange[2])
 623 {
 624     const bool subdiv = cu.m_tuDepth[absPartIdx] + cu.m_cuDepth[absPartIdx] > (uint8_t)depth;
 625     uint32_t hChromaShift = cu.m_hChromaShift;
 626     uint32_t vChromaShift = cu.m_vChromaShift;
 627     uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, trIdx);
 628     uint32_t cbfU = cu.getCbf(absPartIdx, TEXT_CHROMA_U, trIdx);
 629     uint32_t cbfV = cu.getCbf(absPartIdx, TEXT_CHROMA_V, trIdx);
 630
 631     if (!trIdx)
 632         state.bakAbsPartIdxCU = absPartIdx;
 633
 634     if (log2TrSize == 2 && cu.m_chromaFormat != X265_CSP_I444)
 635     {
 636         uint32_t partNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
 637         if (!(absPartIdx & (partNum - 1)))
 638         {
 639             state.bakAbsPartIdx   = absPartIdx;
 640             state.bakChromaOffset = offsetChroma;
 641         }
 642         else if ((absPartIdx & (partNum - 1)) == (partNum - 1))
 643         {
 644             cbfU = cu.getCbf(state.bakAbsPartIdx, TEXT_CHROMA_U, trIdx);
 645             cbfV = cu.getCbf(state.bakAbsPartIdx, TEXT_CHROMA_V, trIdx);
 646         }
 647     }
 648
 649     /* in each of these conditions, the subdiv flag is implied and not signaled,
 650      * so we have checks to make sure the implied value matches our intentions */
 651     if (cu.m_predMode[absPartIdx] == MODE_INTRA && cu.m_partSize[absPartIdx] == SIZE_NxN && depth == cu.m_cuDepth[absPartIdx])
 652     {
 653         X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
 654     }
 655     else if (cu.m_predMode[absPartIdx] == MODE_INTER && (cu.m_partSize[absPartIdx] != SIZE_2Nx2N) && depth == cu.m_cuDepth[absPartIdx] &&
 656              cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
 657     {
 658         X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2TrSize %d, depthRange[0] %d\n", log2TrSize, depthRange[0]);
 659     }
 660     else if (log2TrSize > depthRange[1])
 661     {
 662         X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
 663     }
 664     else if (log2TrSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2TrSize == depthRange[0])
 665     {
 666         X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
 667     }
 668     else
 669     {
 670         X265_CHECK(log2TrSize > depthRange[0], "transform size failure\n");
 671         codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
 672     }
 673
 674     const uint32_t trDepthCurr = depth - cu.m_cuDepth[absPartIdx];
 675     const bool bFirstCbfOfCU = trDepthCurr == 0;
 676
 677     bool mCodeAll = true;
 678     const uint32_t numPels = 1 << (log2TrSize * 2 - hChromaShift - vChromaShift);
 679     if (numPels < (MIN_TU_SIZE * MIN_TU_SIZE))
 680         mCodeAll = false;
 681
 682     if (bFirstCbfOfCU || mCodeAll)
 683     {
 684         uint32_t tuSize = 1 << log2TrSize;
 685         if (bFirstCbfOfCU || cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr - 1))
 686             codeQtCbf(cu, absPartIdx, absPartIdxStep, (tuSize >> hChromaShift), (tuSize >> vChromaShift), TEXT_CHROMA_U, trDepthCurr, (subdiv == 0));
 687         if (bFirstCbfOfCU || cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr - 1))
 688             codeQtCbf(cu, absPartIdx, absPartIdxStep, (tuSize >> hChromaShift), (tuSize >> vChromaShift), TEXT_CHROMA_V, trDepthCurr, (subdiv == 0));
 689     }
 690     else
 691     {
 692         X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr) == cu.getCbf(absPartIdx, TEXT_CHROMA_U, trDepthCurr - 1), "chroma xform size match failure\n");
 693         X265_CHECK(cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr) == cu.getCbf(absPartIdx, TEXT_CHROMA_V, trDepthCurr - 1), "chroma xform size match failure\n");
 694     }
 695
 696     if (subdiv)
 697     {
 698         log2TrSize--;
 699         uint32_t numCoeff  = 1 << (log2TrSize * 2);
 700         uint32_t numCoeffC = (numCoeff >> (hChromaShift + vChromaShift));
 701         trIdx++;
 702         ++depth;
 703         absPartIdxStep >>= 2;
 704         const uint32_t partNum = NUM_CU_PARTITIONS >> (depth << 1);
 705
 706         encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
 707
 708         absPartIdx += partNum;
 709         offsetLuma += numCoeff;
 710         offsetChroma += numCoeffC;
 711         encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
 712
 713         absPartIdx += partNum;
 714         offsetLuma += numCoeff;
 715         offsetChroma += numCoeffC;
 716         encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
 717
 718         absPartIdx += partNum;
 719         offsetLuma += numCoeff;
 720         offsetChroma += numCoeffC;
 721         encodeTransform(cu, state, offsetLuma, offsetChroma, absPartIdx, absPartIdxStep, depth, log2TrSize, trIdx, bCodeDQP, depthRange);
 722     }
 723     else
 724     {
 725         if (cu.m_predMode[absPartIdx] != MODE_INTRA && depth == cu.m_cuDepth[absPartIdx] && !cu.getCbf(absPartIdx, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdx, TEXT_CHROMA_V, 0))
 726         {
 727             X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
 728         }
 729         else
 730             codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu.m_tuDepth[absPartIdx]);
 731
 732         if (cbfY || cbfU || cbfV)
 733         {
 734             // dQP: only for CTU once
 735             if (cu.m_slice->m_pps->bUseDQP)
 736             {
 737                 if (bCodeDQP)
 738                 {
 739                     codeDeltaQP(cu, state.bakAbsPartIdxCU);
 740                     bCodeDQP = false;
 741                 }
 742             }
 743         }
 744         if (cbfY)
 745             codeCoeffNxN(cu, cu.m_trCoeff[0] + offsetLuma, absPartIdx, log2TrSize, TEXT_LUMA);
 746
 747         int chFmt = cu.m_chromaFormat;
 748         if (log2TrSize == 2 && chFmt != X265_CSP_I444)
 749         {
 750             uint32_t partNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
 751             if ((absPartIdx & (partNum - 1)) == (partNum - 1))
 752             {
 753                 const uint32_t log2TrSizeC = 2;
 754                 const bool splitIntoSubTUs = (chFmt == X265_CSP_I422);
 755
 756                 uint32_t curPartNum = NUM_CU_PARTITIONS >> ((depth - 1) << 1);
 757
 758                 for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
 759                 {
 760                     TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, state.bakAbsPartIdx);
 761                     const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
 762                     do
 763                     {
 764                         uint32_t cbf = cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
 765                         if (cbf)
 766                         {
 767                             uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
 768                             codeCoeffNxN(cu, coeffChroma + state.bakChromaOffset + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
 769                         }
 770                     }
 771                     while (tuIterator.isNextSection());
 772                 }
 773             }
 774         }
 775         else
 776         {
 777             uint32_t log2TrSizeC = log2TrSize - hChromaShift;
 778             const bool splitIntoSubTUs = (chFmt == X265_CSP_I422);
 779             uint32_t curPartNum = NUM_CU_PARTITIONS >> (depth << 1);
 780             for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
 781             {
 782                 TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdx);
 783                 const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
 784                 do
 785                 {
 786                     uint32_t cbf = cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
 787                     if (cbf)
 788                     {
 789                         uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
 790                         codeCoeffNxN(cu, coeffChroma + offsetChroma + subTUOffset, tuIterator.absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
 791                     }
 792                 }
 793                 while (tuIterator.isNextSection());
 794             }
 795         }
 796     }
 797 }
 798
 799 void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
 800 {
 801     if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
 802     {
 803         codeIntraDirLumaAng(cu, absPartIdx, true);
 804         if (cu.m_chromaFormat != X265_CSP_I400)
 805         {
 806             uint32_t chromaDirMode[NUM_CHROMA_MODE];
 807             cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
 808
 809             codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
 810
 811             if ((cu.m_chromaFormat == X265_CSP_I444) && (cu.m_partSize[absPartIdx] == SIZE_NxN))
 812             {
 813                 uint32_t partOffset = (NUM_CU_PARTITIONS >> (cu.m_cuDepth[absPartIdx] << 1)) >> 2;
 814                 for (uint32_t i = 1; i <= 3; i++)
 815                 {
 816                     uint32_t offset = absPartIdx + i * partOffset;
 817                     cu.getAllowedChromaDir(offset, chromaDirMode);
 818                     codeIntraDirChroma(cu, offset, chromaDirMode);
 819                 }
 820             }
 821         }
 822     }
 823     else // if it is inter mode, encode motion vector and reference index
 824         codePUWise(cu, absPartIdx);
 825 }
 826
 827 /** encode motion information for every PU block */
 828 void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
 829 {
 830     PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
 831     uint32_t numPU = (partSize == SIZE_2Nx2N ? 1 : (partSize == SIZE_NxN ? 4 : 2));
 832     uint32_t depth = cu.m_cuDepth[absPartIdx];
 833     uint32_t puOffset = (g_puOffset[uint32_t(partSize)] << (g_maxFullDepth - depth) * 2) >> 4;
 834
 835     for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += puOffset)
 836     {
 837         codeMergeFlag(cu, subPartIdx);
 838         if (cu.m_mergeFlag[subPartIdx])
 839             codeMergeIndex(cu, subPartIdx);
 840         else
 841         {
 842             if (cu.m_slice->isInterB())
 843                 codeInterDir(cu, subPartIdx);
 844
 845             uint32_t interDir = cu.m_interDir[subPartIdx];
 846             for (uint32_t list = 0; list < 2; list++)
 847             {
 848                 if (interDir & (1 << list))
 849                 {
 850                     X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
 851
 852                     codeRefFrmIdxPU(cu, subPartIdx, list);
 853                     codeMvd(cu, subPartIdx, list);
 854                     codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
 855                 }
 856             }
 857         }
 858     }
 859 }
 860
 861 /** encode reference frame index for a PU block */
 862 void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
 863 {
 864     X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
 865
 866     if (cu.m_slice->m_numRefIdx[list] > 1)
 867         codeRefFrmIdx(cu, absPartIdx, list);
 868 }
 869
 870 void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, uint32_t depth, bool& bCodeDQP, uint32_t depthRange[2])
 871 {
 872     if (!cu.isIntra(absPartIdx))
 873     {
 874         if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
 875             codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
 876         if (!cu.getQtRootCbf(absPartIdx))
 877             return;
 878     }
 879
 880     uint32_t log2CUSize   = cu.m_log2CUSize[absPartIdx];
 881     uint32_t lumaOffset   = absPartIdx << (LOG2_UNIT_SIZE * 2);
 882     uint32_t chromaOffset = lumaOffset >> (cu.m_hChromaShift + cu.m_vChromaShift);
 883     uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> (depth << 1);
 884     CoeffCodeState state;
 885     encodeTransform(cu, state, lumaOffset, chromaOffset, absPartIdx, absPartIdxStep, depth, log2CUSize, 0, bCodeDQP, depthRange);
 886 }
 887
 888 void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
 889 {
 890     int typeIdx = ctuParam.typeIdx;
 891
 892     if (plane != 2)
 893     {
 894         encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
 895         if (typeIdx >= 0)
 896             encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
 897     }
 898
 899     if (typeIdx >= 0)
 900     {
 901         enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
 902         if (typeIdx == SAO_BO)
 903         {
 904             for (int i = 0; i < SAO_BO_LEN; i++)
 905                 codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
 906
 907             for (int i = 0; i < SAO_BO_LEN; i++)
 908                 if (ctuParam.offset[i] != 0)
 909                     encodeBinEP(ctuParam.offset[i] < 0);
 910
 911             encodeBinsEP(ctuParam.bandPos, 5);
 912         }
 913         else // if (typeIdx < SAO_BO)
 914         {
 915             codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
 916             codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
 917             codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
 918             codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
 919             if (plane != 2)
 920                 encodeBinsEP((uint32_t)(typeIdx), 2);
 921         }
 922     }
 923 }
 924
 925 /** initialize context model with respect to QP and initialization value */
 926 uint8_t sbacInit(int qp, int initValue)
 927 {
 928     qp = Clip3(0, 51, qp);
 929
 930     int  slope      = (initValue >> 4) * 5 - 45;
 931     int  offset     = ((initValue & 15) << 3) - 16;
 932     int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
 933     uint32_t mpState = (initState >= 64);
 934     uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
 935
 936     return (uint8_t)state;
 937 }
 938
 939 static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
 940 {
 941     ctxModel += sliceType * size;
 942
 943     for (int n = 0; n < size; n++)
 944         contextModel[n] = sbacInit(qp, ctxModel[n]);
 945 }
 946
 947 void Entropy::resetEntropy(const Slice& slice)
 948 {
 949     int  qp              = slice.m_sliceQp;
 950     SliceType sliceType  = slice.m_sliceType;
 951
 952     initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
 953     initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
 954     initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
 955     initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
 956     initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
 957     initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
 958     initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
 959     initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
 960     initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
 961     initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
 962     initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
 963     initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
 964     initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
 965     initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
 966     initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
 967     initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
 968     initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
 969     initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
 970     initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
 971     initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
 972     initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
 973     initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
 974     initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
 975     initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
 976     initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
 977     initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
 978     // new structure
 979
 980     start();
 981 }
 982
 983 /* code explicit wp tables */
 984 void Entropy::codePredWeightTable(const Slice& slice)
 985 {
 986     const WeightParam *wp;
 987     bool            bChroma      = true; // 4:0:0 not yet supported
 988     bool            bDenomCoded  = false;
 989     int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
 990     uint32_t        totalSignalledWeightFlags = 0;
 991
 992     if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
 993         (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
 994     {
 995         for (int list = 0; list < numRefDirs; list++)
 996         {
 997             for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
 998             {
 999                 wp = slice.m_weightPredTable[list][ref];
1000                 if (!bDenomCoded)
1001                 {
1002                     WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1003
1004                     if (bChroma)
1005                     {
1006                         int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1007                         WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1008                     }
1009                     bDenomCoded = true;
1010                 }
1011                 WRITE_FLAG(wp[0].bPresentFlag, "luma_weight_lX_flag");
1012                 totalSignalledWeightFlags += wp[0].bPresentFlag;
1013             }
1014
1015             if (bChroma)
1016             {
1017                 for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1018                 {
1019                     wp = slice.m_weightPredTable[list][ref];
1020                     WRITE_FLAG(wp[1].bPresentFlag, "chroma_weight_lX_flag");
1021                     totalSignalledWeightFlags += 2 * wp[1].bPresentFlag;
1022                 }
1023             }
1024
1025             for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1026             {
1027                 wp = slice.m_weightPredTable[list][ref];
1028                 if (wp[0].bPresentFlag)
1029                 {
1030                     int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1031                     WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1032                     WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1033                 }
1034
1035                 if (bChroma)
1036                 {
1037                     if (wp[1].bPresentFlag)
1038                     {
1039                         for (int plane = 1; plane < 3; plane++)
1040                         {
1041                             int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1042                             WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1043
1044                             int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1045                             int deltaChroma = (wp[plane].inputOffset - pred);
1046                             WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1047                         }
1048                     }
1049                 }
1050             }
1051         }
1052
1053         X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1054     }
1055 }
1056
1057 void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1058 {
1059     X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1060
1061     encodeBin(symbol ? 1 : 0, scmModel[0]);
1062
1063     if (!symbol)
1064         return;
1065
1066     bool bCodeLast = (maxSymbol > symbol);
1067
1068     while (--symbol)
1069         encodeBin(1, scmModel[offset]);
1070
1071     if (bCodeLast)
1072         encodeBin(0, scmModel[offset]);
1073 }
1074
1075 void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1076 {
1077     uint32_t bins = 0;
1078     int numBins = 0;
1079
1080     while (symbol >= (uint32_t)(1 << count))
1081     {
1082         bins = 2 * bins + 1;
1083         numBins++;
1084         symbol -= 1 << count;
1085         count++;
1086     }
1087
1088     bins = 2 * bins + 0;
1089     numBins++;
1090
1091     bins = (bins << count) | symbol;
1092     numBins += count;
1093
1094     X265_CHECK(numBins <= 32, "numBins too large\n");
1095     encodeBinsEP(bins, numBins);
1096 }
1097
1098 /** Coding of coeff_abs_level_minus3 */
1099 void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1100 {
1101     uint32_t length;
1102     const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1103
1104     if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1105     {
1106         length = codeNumber >> absGoRice;
1107
1108         X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1109         X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1110         encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1111     }
1112     else
1113     {
1114         length = 0;
1115         codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1116         if (codeNumber != 0)
1117         {
1118             unsigned long idx;
1119             CLZ32(idx, codeNumber + 1);
1120             length = idx;
1121             codeNumber -= (1 << idx) - 1;
1122         }
1123         codeNumber = (codeNumber << absGoRice) + codeRemain;
1124
1125         encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1126         encodeBinsEP(codeNumber, length + absGoRice);
1127     }
1128 }
1129
1130 // SBAC RD
1131 void Entropy::loadIntraDirModeLuma(const Entropy& src)
1132 {
1133     X265_CHECK(src.m_valid, "invalid copy source context\n");
1134     m_fracBits = src.m_fracBits;
1135     m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1136 }
1137
1138 void Entropy::copyFrom(const Entropy& src)
1139 {
1140     X265_CHECK(src.m_valid, "invalid copy source context\n");
1141
1142     copyState(src);
1143
1144     memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1145     markValid();
1146 }
1147
1148 void Entropy::codeMVPIdx(uint32_t symbol)
1149 {
1150     encodeBin(symbol, m_contextState[OFF_MVP_IDX_CTX]);
1151 }
1152
1153 void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1154 {
1155     PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1156
1157     if (cu.isIntra(absPartIdx))
1158     {
1159         if (depth == g_maxCUDepth)
1160             encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1161         return;
1162     }
1163
1164     switch (partSize)
1165     {
1166     case SIZE_2Nx2N:
1167         encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1168         break;
1169
1170     case SIZE_2NxN:
1171     case SIZE_2NxnU:
1172     case SIZE_2NxnD:
1173         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1174         encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1175         if (cu.m_slice->m_sps->maxAMPDepth > depth)
1176         {
1177             encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1178             if (partSize != SIZE_2NxN)
1179                 encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1180         }
1181         break;
1182
1183     case SIZE_Nx2N:
1184     case SIZE_nLx2N:
1185     case SIZE_nRx2N:
1186         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1187         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1188         if (depth == g_maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1189             encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1190         if (cu.m_slice->m_sps->maxAMPDepth > depth)
1191         {
1192             encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1193             if (partSize != SIZE_Nx2N)
1194                 encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1195         }
1196         break;
1197     default:
1198         X265_CHECK(0, "invalid CU partition\n");
1199         break;
1200     }
1201 }
1202
1203 void Entropy::codePredMode(int predMode)
1204 {
1205     encodeBin(predMode == MODE_INTER ? 0 : 1, m_contextState[OFF_PRED_MODE_CTX]);
1206 }
1207
1208 void Entropy::codeCUTransquantBypassFlag(uint32_t symbol)
1209 {
1210     encodeBin(symbol, m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX]);
1211 }
1212
1213 void Entropy::codeSkipFlag(const CUData& cu, uint32_t absPartIdx)
1214 {
1215     // get context function is here
1216     uint32_t symbol = cu.isSkipped(absPartIdx) ? 1 : 0;
1217     uint32_t ctxSkip = cu.getCtxSkipFlag(absPartIdx);
1218
1219     encodeBin(symbol, m_contextState[OFF_SKIP_FLAG_CTX + ctxSkip]);
1220 }
1221
1222 void Entropy::codeMergeFlag(const CUData& cu, uint32_t absPartIdx)
1223 {
1224     const uint32_t symbol = cu.m_mergeFlag[absPartIdx] ? 1 : 0;
1225
1226     encodeBin(symbol, m_contextState[OFF_MERGE_FLAG_EXT_CTX]);
1227 }
1228
1229 void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1230 {
1231     uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1232
1233     if (numCand > 1)
1234     {
1235         uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx
1236         encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1237
1238         X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1239
1240         if (unaryIdx != 0)
1241         {
1242             uint32_t mask = (1 << unaryIdx) - 2;
1243             mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1244             encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1245         }
1246     }
1247 }
1248
1249 void Entropy::codeSplitFlag(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1250 {
1251     X265_CHECK(depth < g_maxCUDepth, "invalid depth\n");
1252
1253     uint32_t ctx           = cu.getCtxSplitFlag(absPartIdx, depth);
1254     uint32_t currSplitFlag = (cu.m_cuDepth[absPartIdx] > depth) ? 1 : 0;
1255
1256     X265_CHECK(ctx < 3, "ctx out of range\n");
1257     encodeBin(currSplitFlag, m_contextState[OFF_SPLIT_FLAG_CTX + ctx]);
1258 }
1259
1260 void Entropy::codeTransformSubdivFlag(uint32_t symbol, uint32_t ctx)
1261 {
1262     encodeBin(symbol, m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX + ctx]);
1263 }
1264
1265 uint32_t Entropy::bitsIntraModeNonMPM() const
1266 {
1267     uint32_t mstate = m_contextState[OFF_ADI_CTX];
1268     uint32_t bits = ((uint32_t)(m_fracBits & 32767) + sbacGetEntropyBits(mstate, 0)) >> 15;
1269     return bits + 5; /* fixed cost for encodeBinsEP() */
1270 }
1271
1272 uint32_t Entropy::bitsIntraModeMPM(const uint32_t preds[3], uint32_t dir) const
1273 {
1274     X265_CHECK(dir == preds[0] || dir == preds[1] || dir == preds[2], "dir must be a most probable mode\n");
1275     uint32_t mstate = m_contextState[OFF_ADI_CTX];
1276     uint32_t bits = ((uint32_t)(m_fracBits & 32767) + sbacGetEntropyBits(mstate, 1)) >> 15;
1277     return bits + (dir == preds[0] ? 1 : 2);
1278 }
1279
1280 void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1281 {
1282     uint32_t dir[4], j;
1283     uint32_t preds[4][3];
1284     int predIdx[4];
1285     PartSize mode = (PartSize)cu.m_partSize[absPartIdx];
1286     uint32_t partNum = isMultiple ? (mode == SIZE_NxN ? 4 : 1) : 1;
1287     uint32_t partOffset = (NUM_CU_PARTITIONS >> (cu.m_cuDepth[absPartIdx] << 1)) >> 2;
1288
1289     for (j = 0; j < partNum; j++)
1290     {
1291         dir[j] = cu.m_lumaIntraDir[absPartIdx + partOffset * j];
1292         cu.getIntraDirLumaPredictor(absPartIdx + partOffset * j, preds[j]);
1293         predIdx[j] = -1;
1294         for (uint32_t i = 0; i < 3; i++)
1295             if (dir[j] == preds[j][i])
1296                 predIdx[j] = i;
1297
1298         encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
1299     }
1300
1301     for (j = 0; j < partNum; j++)
1302     {
1303         if (predIdx[j] != -1)
1304         {
1305             X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
1306             // NOTE: Mapping
1307             //       0 = 0
1308             //       1 = 10
1309             //       2 = 11
1310             int nonzero = (!!predIdx[j]);
1311             encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
1312         }
1313         else
1314         {
1315             if (preds[j][0] > preds[j][1])
1316                 std::swap(preds[j][0], preds[j][1]);
1317
1318             if (preds[j][0] > preds[j][2])
1319                 std::swap(preds[j][0], preds[j][2]);
1320
1321             if (preds[j][1] > preds[j][2])
1322                 std::swap(preds[j][1], preds[j][2]);
1323
1324             dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
1325             dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
1326             dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
1327
1328             encodeBinsEP(dir[j], 5);
1329         }
1330     }
1331 }
1332
1333 void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
1334 {
1335     uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
1336
1337     if (intraDirChroma == DM_CHROMA_IDX)
1338         encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
1339     else
1340     {
1341         for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
1342         {
1343             if (intraDirChroma == chromaDirMode[i])
1344             {
1345                 intraDirChroma = i;
1346                 break;
1347             }
1348         }
1349
1350         encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
1351         encodeBinsEP(intraDirChroma, 2);
1352     }
1353 }
1354
1355 void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
1356 {
1357     const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
1358     const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
1359
1360     if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
1361         encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
1362     if (interDir < 2)
1363         encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
1364 }
1365
1366 void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
1367 {
1368     uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
1369
1370     encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
1371
1372     if (refFrame > 0)
1373     {
1374         uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
1375         if (refNum == 0)
1376             return;
1377
1378         refFrame--;
1379         encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
1380         if (refFrame > 0)
1381         {
1382             uint32_t mask = (1 << refFrame) - 2;
1383             mask >>= (refFrame == refNum) ? 1 : 0;
1384             encodeBinsEP(mask, refFrame - (refFrame == refNum));
1385         }
1386     }
1387 }
1388
1389 void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
1390 {
1391     const MV& mvd = cu.m_mvd[list][absPartIdx];
1392     const int hor = mvd.x;
1393     const int ver = mvd.y;
1394
1395     encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1396     encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1397
1398     const bool bHorAbsGr0 = hor != 0;
1399     const bool bVerAbsGr0 = ver != 0;
1400     const uint32_t horAbs   = 0 > hor ? -hor : hor;
1401     const uint32_t verAbs   = 0 > ver ? -ver : ver;
1402
1403     if (bHorAbsGr0)
1404         encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1405
1406     if (bVerAbsGr0)
1407         encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1408
1409     if (bHorAbsGr0)
1410     {
1411         if (horAbs > 1)
1412             writeEpExGolomb(horAbs - 2, 1);
1413
1414         encodeBinEP(0 > hor ? 1 : 0);
1415     }
1416
1417     if (bVerAbsGr0)
1418     {
1419         if (verAbs > 1)
1420             writeEpExGolomb(verAbs - 2, 1);
1421
1422         encodeBinEP(0 > ver ? 1 : 0);
1423     }
1424 }
1425
1426 void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
1427 {
1428     int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
1429
1430     int qpBdOffsetY = QP_BD_OFFSET;
1431
1432     dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
1433
1434     uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
1435     uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
1436     writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
1437     if (absDQp >= CU_DQP_TU_CMAX)
1438         writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
1439
1440     if (absDQp > 0)
1441     {
1442         uint32_t sign = (dqp > 0 ? 0 : 1);
1443         encodeBinEP(sign);
1444     }
1445 }
1446
1447 void Entropy::codeQtCbf(const CUData& cu, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, TextType ttype, uint32_t trDepth, bool lowestLevel)
1448 {
1449     uint32_t ctx = ctxCbf[ttype][trDepth];
1450
1451     bool canQuadSplit       = (width >= (MIN_TU_SIZE * 2)) && (height >= (MIN_TU_SIZE * 2));
1452     uint32_t lowestTUDepth  = trDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
1453
1454     if ((width != height) && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
1455     {
1456         uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
1457                                                           // Otherwise, this must be the level above the lowest level (as specified above)
1458         uint32_t partIdxesPerSubTU = absPartIdxStep >> 1;
1459
1460         for (uint32_t subTU = 0; subTU < 2; subTU++)
1461         {
1462             uint32_t subTUAbsPartIdx = absPartIdx + (subTU * partIdxesPerSubTU);
1463             uint32_t cbf = cu.getCbf(subTUAbsPartIdx, ttype, subTUDepth);
1464
1465             encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
1466         }
1467     }
1468     else
1469     {
1470         uint32_t cbf = cu.getCbf(absPartIdx, ttype, lowestTUDepth);
1471
1472         encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
1473     }
1474 }
1475
1476 void Entropy::codeQtCbf(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth)
1477 {
1478     uint32_t ctx = ctxCbf[ttype][trDepth];
1479     uint32_t cbf = cu.getCbf(absPartIdx, ttype, trDepth);
1480     encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
1481 }
1482
1483 void Entropy::codeQtCbf(uint32_t cbf, TextType ttype, uint32_t trDepth)
1484 {
1485     uint32_t ctx = ctxCbf[ttype][trDepth];
1486     encodeBin(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
1487 }
1488
1489 void Entropy::codeTransformSkipFlags(const CUData& cu, uint32_t absPartIdx, uint32_t trSize, TextType ttype)
1490 {
1491     if (cu.m_tqBypass[absPartIdx])
1492         return;
1493     if (trSize != 4)
1494         return;
1495
1496     uint32_t useTransformSkip = cu.m_transformSkip[ttype][absPartIdx];
1497     encodeBin(useTransformSkip, m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX + (ttype ? NUM_TRANSFORMSKIP_FLAG_CTX : 0)]);
1498 }
1499
1500 void Entropy::codeQtRootCbf(uint32_t cbf)
1501 {
1502     encodeBin(cbf, m_contextState[OFF_QT_ROOT_CBF_CTX]);
1503 }
1504
1505 void Entropy::codeQtCbfZero(TextType ttype, uint32_t trDepth)
1506 {
1507     // this function is only used to estimate the bits when cbf is 0
1508     // and will never be called when writing the bitsream.
1509     uint32_t ctx = ctxCbf[ttype][trDepth];
1510     encodeBin(0, m_contextState[OFF_QT_CBF_CTX + ctx]);
1511 }
1512
1513 void Entropy::codeQtRootCbfZero()
1514 {
1515     // this function is only used to estimate the bits when cbf is 0
1516     // and will never be called when writing the bistream.
1517     encodeBin(0, m_contextState[OFF_QT_ROOT_CBF_CTX]);
1518 }
1519
1520 /** Encode (X,Y) position of the last significant coefficient
1521  * \param posx X component of last coefficient
1522  * \param posy Y component of last coefficient
1523  * \param log2TrSize
1524  * \param bIsLuma
1525  * \param scanIdx scan type (zig-zag, hor, ver)
1526  * This method encodes the X and Y component within a block of the last significant coefficient.
1527  */
1528 void Entropy::codeLastSignificantXY(uint32_t posx, uint32_t posy, uint32_t log2TrSize, bool bIsLuma, uint32_t scanIdx)
1529 {
1530     // swap
1531     if (scanIdx == SCAN_VER)
1532         std::swap(posx, posy);
1533
1534     uint32_t ctxLast;
1535     uint32_t groupIdxX = getGroupIdx(posx);
1536     uint32_t groupIdxY = getGroupIdx(posy);
1537
1538     int blkSizeOffset = bIsLuma ? ((log2TrSize - 2) * 3 + ((log2TrSize - 1) >> 2)) : NUM_CTX_LAST_FLAG_XY_LUMA;
1539     int ctxShift = bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2;
1540     uint32_t maxGroupIdx = log2TrSize * 2 - 1;
1541
1542     // posX
1543     uint8_t *ctxX = &m_contextState[OFF_CTX_LAST_FLAG_X];
1544     for (ctxLast = 0; ctxLast < groupIdxX; ctxLast++)
1545         encodeBin(1, *(ctxX + blkSizeOffset + (ctxLast >> ctxShift)));
1546
1547     if (groupIdxX < maxGroupIdx)
1548         encodeBin(0, *(ctxX + blkSizeOffset + (ctxLast >> ctxShift)));
1549
1550     // posY
1551     uint8_t *ctxY = &m_contextState[OFF_CTX_LAST_FLAG_Y];
1552     for (ctxLast = 0; ctxLast < groupIdxY; ctxLast++)
1553         encodeBin(1, *(ctxY + blkSizeOffset + (ctxLast >> ctxShift)));
1554
1555     if (groupIdxY < maxGroupIdx)
1556         encodeBin(0, *(ctxY + blkSizeOffset + (ctxLast >> ctxShift)));
1557
1558     if (groupIdxX > 3)
1559     {
1560         uint32_t count = (groupIdxX - 2) >> 1;
1561         posx = posx - g_minInGroup[groupIdxX];
1562         encodeBinsEP(posx, count);
1563     }
1564     if (groupIdxY > 3)
1565     {
1566         uint32_t count = (groupIdxY - 2) >> 1;
1567         posy = posy - g_minInGroup[groupIdxY];
1568         encodeBinsEP(posy, count);
1569     }
1570 }
1571
1572 void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
1573 {
1574     uint32_t trSize = 1 << log2TrSize;
1575
1576     // compute number of significant coefficients
1577     uint32_t numSig = primitives.count_nonzero(coeff, (1 << (log2TrSize << 1)));
1578
1579     X265_CHECK(numSig > 0, "cbf check fail\n");
1580
1581     bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled && !cu.m_tqBypass[absPartIdx];
1582
1583     if (cu.m_slice->m_pps->bTransformSkipEnabled)
1584         codeTransformSkipFlags(cu, absPartIdx, trSize, ttype);
1585
1586     bool bIsLuma = ttype == TEXT_LUMA;
1587
1588     // select scans
1589     TUEntropyCodingParameters codingParameters;
1590     cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
1591
1592     //----- encode significance map -----
1593
1594     // Find position of last coefficient
1595     int scanPosLast = 0;
1596     uint32_t posLast;
1597     uint64_t sigCoeffGroupFlag64 = 0;
1598     const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
1599     assert((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1));
1600     do
1601     {
1602         posLast = codingParameters.scan[scanPosLast++];
1603
1604         const uint32_t isNZCoeff = (coeff[posLast] != 0);
1605         // get L1 sig map
1606         // NOTE: the new algorithm is complicated, so I keep reference code here
1607         //uint32_t posy   = posLast >> log2TrSize;
1608         //uint32_t posx   = posLast - (posy << log2TrSize);
1609         //uint32_t blkIdx0 = ((posy >> MLS_CG_LOG2_SIZE) << codingParameters.log2TrSizeCG) + (posx >> MLS_CG_LOG2_SIZE);
1610         const uint32_t blkIdx = ((posLast >> (2 * MLS_CG_LOG2_SIZE)) & ~maskPosXY) + ((posLast >> MLS_CG_LOG2_SIZE) & maskPosXY);
1611         sigCoeffGroupFlag64 |= ((uint64_t)isNZCoeff << blkIdx);
1612         numSig -= isNZCoeff;
1613     }
1614     while (numSig > 0);
1615     scanPosLast--;
1616
1617     // Code position of last coefficient
1618     int posLastY = posLast >> log2TrSize;
1619     int posLastX = posLast & (trSize - 1);
1620     codeLastSignificantXY(posLastX, posLastY, log2TrSize, bIsLuma, codingParameters.scanType);
1621
1622     //===== code significance flag =====
1623     uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
1624     uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
1625     const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
1626     uint32_t c1 = 1;
1627     uint32_t goRiceParam = 0;
1628     int scanPosSig = scanPosLast;
1629
1630     for (int subSet = lastScanSet; subSet >= 0; subSet--)
1631     {
1632         int numNonZero = 0;
1633         int subPos     = subSet << MLS_CG_SIZE;
1634         goRiceParam    = 0;
1635         int absCoeff[1 << MLS_CG_SIZE];
1636         uint32_t coeffSigns = 0;
1637         int lastNZPosInCG = -1;
1638         int firstNZPosInCG = 1 << MLS_CG_SIZE;
1639         if (scanPosSig == scanPosLast)
1640         {
1641             absCoeff[0] = int(abs(coeff[posLast]));
1642             coeffSigns  = (coeff[posLast] < 0);
1643             numNonZero  = 1;
1644             lastNZPosInCG  = scanPosSig;
1645             firstNZPosInCG = scanPosSig;
1646             scanPosSig--;
1647         }
1648         // encode significant_coeffgroup_flag
1649         const int cgBlkPos = codingParameters.scanCG[subSet];
1650         const int cgPosY   = cgBlkPos >> codingParameters.log2TrSizeCG;
1651         const int cgPosX   = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
1652         const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
1653
1654         if (subSet == lastScanSet || !subSet)
1655             sigCoeffGroupFlag64 |= cgBlkPosMask;
1656         else
1657         {
1658             uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
1659             uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
1660             encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
1661         }
1662
1663         // encode significant_coeff_flag
1664         if (sigCoeffGroupFlag64 & cgBlkPosMask)
1665         {
1666             const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
1667             uint32_t blkPos, sig, ctxSig;
1668             for (; scanPosSig >= subPos; scanPosSig--)
1669             {
1670                 blkPos  = codingParameters.scan[scanPosSig];
1671                 sig     = (coeff[blkPos] != 0);
1672                 if (scanPosSig > subPos || subSet == 0 || numNonZero)
1673                 {
1674                     ctxSig = Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext);
1675                     encodeBin(sig, baseCtx[ctxSig]);
1676                 }
1677                 if (sig)
1678                 {
1679                     absCoeff[numNonZero] = int(abs(coeff[blkPos]));
1680                     coeffSigns = 2 * coeffSigns + ((uint32_t)coeff[blkPos] >> 31);
1681                     numNonZero++;
1682                     if (lastNZPosInCG < 0)
1683                         lastNZPosInCG = scanPosSig;
1684                     firstNZPosInCG = scanPosSig;
1685                 }
1686             }
1687         }
1688         else
1689             scanPosSig = subPos - 1;
1690
1691         if (numNonZero > 0)
1692         {
1693             bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
1694             uint32_t ctxSet = (subSet > 0 && bIsLuma) ? 2 : 0;
1695
1696             if (c1 == 0)
1697                 ctxSet++;
1698
1699             c1 = 1;
1700             uint8_t *baseCtxMod = bIsLuma ? &m_contextState[OFF_ONE_FLAG_CTX + 4 * ctxSet] : &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA + 4 * ctxSet];
1701
1702             int numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
1703             int firstC2FlagIdx = -1;
1704             for (int idx = 0; idx < numC1Flag; idx++)
1705             {
1706                 uint32_t symbol = absCoeff[idx] > 1;
1707                 encodeBin(symbol, baseCtxMod[c1]);
1708                 if (symbol)
1709                 {
1710                     c1 = 0;
1711
1712                     if (firstC2FlagIdx == -1)
1713                         firstC2FlagIdx = idx;
1714                 }
1715                 else if ((c1 < 3) && (c1 > 0))
1716                     c1++;
1717             }
1718
1719             if (!c1)
1720             {
1721                 baseCtxMod = bIsLuma ? &m_contextState[OFF_ABS_FLAG_CTX + ctxSet] : &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA + ctxSet];
1722                 if (firstC2FlagIdx != -1)
1723                 {
1724                     uint32_t symbol = absCoeff[firstC2FlagIdx] > 2;
1725                     encodeBin(symbol, baseCtxMod[0]);
1726                 }
1727             }
1728
1729             if (bHideFirstSign && signHidden)
1730                 encodeBinsEP((coeffSigns >> 1), numNonZero - 1);
1731             else
1732                 encodeBinsEP(coeffSigns, numNonZero);
1733
1734             int firstCoeff2 = 1;
1735             if (!c1 || numNonZero > C1FLAG_NUMBER)
1736             {
1737                 for (int idx = 0; idx < numNonZero; idx++)
1738                 {
1739                     int baseLevel = (idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1;
1740
1741                     if (absCoeff[idx] >= baseLevel)
1742                     {
1743                         writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
1744                         if (absCoeff[idx] > 3 * (1 << goRiceParam))
1745                             goRiceParam = std::min<uint32_t>(goRiceParam + 1, 4);
1746                     }
1747                     if (absCoeff[idx] >= 2)
1748                         firstCoeff2 = 0;
1749                 }
1750             }
1751         }
1752     }
1753 }
1754
1755 void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
1756 {
1757     X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1758
1759     uint32_t isCodeNonZero = !!code;
1760
1761     encodeBinEP(isCodeNonZero);
1762     if (isCodeNonZero)
1763     {
1764         uint32_t isCodeLast = (maxSymbol > code);
1765         uint32_t mask = (1 << (code - 1)) - 1;
1766         uint32_t len = code - 1 + isCodeLast;
1767         mask <<= isCodeLast;
1768
1769         encodeBinsEP(mask, len);
1770     }
1771 }
1772
1773 /* estimate bit cost for CBP, significant map and significant coefficients */
1774 void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
1775 {
1776     estCBFBit(estBitsSbac);
1777
1778     estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
1779
1780     // encode significance map
1781     estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
1782
1783     // encode significant coefficients
1784     estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
1785 }
1786
1787 /* estimate bit cost for each CBP bit */
1788 void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
1789 {
1790     const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
1791
1792     for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
1793     {
1794         estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
1795         estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
1796     }
1797
1798     ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
1799
1800     estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
1801     estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
1802 }
1803
1804 /* estimate SAMBAC bit cost for significant coefficient group map */
1805 void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
1806 {
1807     int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
1808
1809     for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
1810         for (uint32_t bin = 0; bin < 2; bin++)
1811             estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
1812 }
1813
1814 /* estimate SAMBAC bit cost for significant coefficient map */
1815 void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
1816 {
1817     int firstCtx = 1, numCtx = 8;
1818
1819     if (log2TrSize >= 4)
1820     {
1821         firstCtx = bIsLuma ? 21 : 12;
1822         numCtx = bIsLuma ? 6 : 3;
1823     }
1824     else if (log2TrSize == 3)
1825     {
1826         firstCtx = 9;
1827         numCtx = bIsLuma ? 12 : 3;
1828     }
1829
1830     if (bIsLuma)
1831     {
1832         for (uint32_t bin = 0; bin < 2; bin++)
1833             estBitsSbac.significantBits[0][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX], bin);
1834
1835         for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
1836             for (uint32_t bin = 0; bin < 2; bin++)
1837                 estBitsSbac.significantBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX + ctxIdx], bin);
1838     }
1839     else
1840     {
1841         for (uint32_t bin = 0; bin < 2; bin++)
1842             estBitsSbac.significantBits[0][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX + (NUM_SIG_FLAG_CTX_LUMA + 0)], bin);
1843
1844         for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
1845             for (uint32_t bin = 0; bin < 2; bin++)
1846                 estBitsSbac.significantBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_FLAG_CTX + (NUM_SIG_FLAG_CTX_LUMA + ctxIdx)], bin);
1847     }
1848     int bitsX = 0, bitsY = 0;
1849
1850     int blkSizeOffset = bIsLuma ? ((log2TrSize - 2) * 3 + ((log2TrSize - 1) >> 2)) : NUM_CTX_LAST_FLAG_XY_LUMA;
1851     int ctxShift = bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2;
1852     uint32_t maxGroupIdx = log2TrSize * 2 - 1;
1853
1854     uint32_t ctx;
1855     const uint8_t *ctxX = &m_contextState[OFF_CTX_LAST_FLAG_X];
1856     for (ctx = 0; ctx < maxGroupIdx; ctx++)
1857     {
1858         int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
1859         estBitsSbac.lastXBits[ctx] = bitsX + sbacGetEntropyBits(ctxX[ctxOffset], 0);
1860         bitsX += sbacGetEntropyBits(ctxX[ctxOffset], 1);
1861     }
1862
1863     estBitsSbac.lastXBits[ctx] = bitsX;
1864
1865     const uint8_t *ctxY = &m_contextState[OFF_CTX_LAST_FLAG_Y];
1866     for (ctx = 0; ctx < maxGroupIdx; ctx++)
1867     {
1868         int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
1869         estBitsSbac.lastYBits[ctx] = bitsY + sbacGetEntropyBits(ctxY[ctxOffset], 0);
1870         bitsY += sbacGetEntropyBits(ctxY[ctxOffset], 1);
1871     }
1872
1873     estBitsSbac.lastYBits[ctx] = bitsY;
1874 }
1875
1876 /* estimate bit cost of significant coefficient */
1877 void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
1878 {
1879     if (bIsLuma)
1880     {
1881         const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
1882         const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
1883
1884         for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
1885         {
1886             estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
1887             estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
1888         }
1889
1890         for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
1891         {
1892             estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
1893             estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
1894         }
1895     }
1896     else
1897     {
1898         const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
1899         const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
1900
1901         for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
1902         {
1903             estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
1904             estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
1905         }
1906
1907         for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
1908         {
1909             estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
1910             estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
1911         }
1912     }
1913 }
1914
1915 /* Initialize our context information from the nominated source */
1916 void Entropy::copyContextsFrom(const Entropy& src)
1917 {
1918     X265_CHECK(src.m_valid, "invalid copy source context\n");
1919
1920     memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
1921     markValid();
1922 }
1923
1924 void Entropy::start()
1925 {
1926     m_low = 0;
1927     m_range = 510;
1928     m_bitsLeft = -12;
1929     m_numBufferedBytes = 0;
1930     m_bufferedByte = 0xff;
1931 }
1932
1933 void Entropy::finish()
1934 {
1935     if (m_low >> (21 + m_bitsLeft))
1936     {
1937         m_bitIf->writeByte(m_bufferedByte + 1);
1938         while (m_numBufferedBytes > 1)
1939         {
1940             m_bitIf->writeByte(0x00);
1941             m_numBufferedBytes--;
1942         }
1943
1944         m_low -= 1 << (21 + m_bitsLeft);
1945     }
1946     else
1947     {
1948         if (m_numBufferedBytes > 0)
1949             m_bitIf->writeByte(m_bufferedByte);
1950
1951         while (m_numBufferedBytes > 1)
1952         {
1953             m_bitIf->writeByte(0xff);
1954             m_numBufferedBytes--;
1955         }
1956     }
1957     m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
1958 }
1959
1960 void Entropy::copyState(const Entropy& other)
1961 {
1962     m_low = other.m_low;
1963     m_range = other.m_range;
1964     m_bitsLeft = other.m_bitsLeft;
1965     m_bufferedByte = other.m_bufferedByte;
1966     m_numBufferedBytes = other.m_numBufferedBytes;
1967     m_fracBits = other.m_fracBits;
1968 }
1969
1970 void Entropy::resetBits()
1971 {
1972     m_low = 0;
1973     m_bitsLeft = -12;
1974     m_numBufferedBytes = 0;
1975     m_bufferedByte = 0xff;
1976     m_fracBits &= 32767;
1977     if (m_bitIf)
1978         m_bitIf->resetBits();
1979 }
1980
1981 /** Encode bin */
1982 void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
1983 {
1984     uint32_t mstate = ctxModel;
1985
1986     ctxModel = sbacNext(mstate, binValue);
1987
1988     if (!m_bitIf)
1989     {
1990         m_fracBits += sbacGetEntropyBits(mstate, binValue);
1991         return;
1992     }
1993
1994     uint32_t range = m_range;
1995     uint32_t state = sbacGetState(mstate);
1996     uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
1997     range -= lps;
1998
1999     X265_CHECK(lps >= 2, "lps is too small\n");
2000
2001     int numBits = (uint32_t)(range - 256) >> 31;
2002     uint32_t low = m_low;
2003
2004     // NOTE: MPS must be LOWEST bit in mstate
2005     X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2006     if ((binValue ^ mstate) & 1)
2007     {
2008         // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2009         //numBits   = g_renormTable[lps >> 3];
2010         unsigned long idx;
2011         CLZ32(idx, lps);
2012         X265_CHECK(state != 63 || idx == 1, "state failure\n");
2013
2014         numBits = 8 - idx;
2015         if (state >= 63)
2016             numBits = 6;
2017         X265_CHECK(numBits <= 6, "numBits failure\n");
2018
2019         low += range;
2020         range = lps;
2021     }
2022     m_low = (low << numBits);
2023     m_range = (range << numBits);
2024     m_bitsLeft += numBits;
2025
2026     if (m_bitsLeft >= 0)
2027         writeOut();
2028 }
2029
2030 /** Encode equiprobable bin */
2031 void Entropy::encodeBinEP(uint32_t binValue)
2032 {
2033     if (!m_bitIf)
2034     {
2035         m_fracBits += 32768;
2036         return;
2037     }
2038     m_low <<= 1;
2039     if (binValue)
2040         m_low += m_range;
2041     m_bitsLeft++;
2042
2043     if (m_bitsLeft >= 0)
2044         writeOut();
2045 }
2046
2047 /** Encode equiprobable bins */
2048 void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2049 {
2050     if (!m_bitIf)
2051     {
2052         m_fracBits += 32768 * numBins;
2053         return;
2054     }
2055
2056     while (numBins > 8)
2057     {
2058         numBins -= 8;
2059         uint32_t pattern = binValues >> numBins;
2060         m_low <<= 8;
2061         m_low += m_range * pattern;
2062         binValues -= pattern << numBins;
2063         m_bitsLeft += 8;
2064
2065         if (m_bitsLeft >= 0)
2066             writeOut();
2067     }
2068
2069     m_low <<= numBins;
2070     m_low += m_range * binValues;
2071     m_bitsLeft += numBins;
2072
2073     if (m_bitsLeft >= 0)
2074         writeOut();
2075 }
2076
2077 /** Encode terminating bin */
2078 void Entropy::encodeBinTrm(uint32_t binValue)
2079 {
2080     if (!m_bitIf)
2081     {
2082         m_fracBits += sbacGetEntropyBitsTrm(binValue);
2083         return;
2084     }
2085
2086     m_range -= 2;
2087     if (binValue)
2088     {
2089         m_low += m_range;
2090         m_low <<= 7;
2091         m_range = 2 << 7;
2092         m_bitsLeft += 7;
2093     }
2094     else if (m_range >= 256)
2095         return;
2096     else
2097     {
2098         m_low <<= 1;
2099         m_range <<= 1;
2100         m_bitsLeft++;
2101     }
2102
2103     if (m_bitsLeft >= 0)
2104         writeOut();
2105 }
2106
2107 /** Move bits from register into bitstream */
2108 void Entropy::writeOut()
2109 {
2110     uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2111     uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2112
2113     m_bitsLeft -= 8;
2114     m_low &= low_mask;
2115
2116     if (leadByte == 0xff)
2117         m_numBufferedBytes++;
2118     else
2119     {
2120         uint32_t numBufferedBytes = m_numBufferedBytes;
2121         if (numBufferedBytes > 0)
2122         {
2123             uint32_t carry = leadByte >> 8;
2124             uint32_t byteTowrite = m_bufferedByte + carry;
2125             m_bitIf->writeByte(byteTowrite);
2126
2127             byteTowrite = (0xff + carry) & 0xff;
2128             while (numBufferedBytes > 1)
2129             {
2130                 m_bitIf->writeByte(byteTowrite);
2131                 numBufferedBytes--;
2132             }
2133         }
2134         m_numBufferedBytes = 1;
2135         m_bufferedByte = (uint8_t)leadByte;
2136     }
2137 }
2138
2139 const uint32_t g_entropyBits[128] =
2140 {
2141     // Corrected table, most notably for last state
2142     0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
2143     0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
2144     0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
2145     0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
2146     0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
2147     0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
2148     0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
2149     0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
2150 };
2151
2152 const uint8_t g_nextState[128][2] =
2153 {
2154     { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
2155     { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
2156     { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
2157     { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
2158     { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
2159     { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
2160     { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
2161     { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
2162     { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
2163     { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
2164     { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
2165     { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
2166     { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
2167     { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
2168     { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
2169     { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
2170 };
2171
2172 }