source/common/common.h

   1 /*****************************************************************************
   2  * Copyright (C) 2013 x265 project
   3  *
   4  * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  19  *
  20  * This program is also available under a commercial proprietary license.
  21  * For more information, contact us at license @ x265.com.
  22  *****************************************************************************/
  23
  24 #ifndef X265_COMMON_H
  25 #define X265_COMMON_H
  26
  27 #include <algorithm>
  28 #include <climits>
  29 #include <cmath>
  30 #include <cstdarg>
  31 #include <cstddef>
  32 #include <cstdio>
  33 #include <cstdlib>
  34 #include <cstring>
  35 #include <cctype>
  36 #include <ctime>
  37
  38 #include <stdint.h>
  39 #include <memory.h>
  40 #include <assert.h>
  41
  42 #include "x265.h"
  43
  44 #define FENC_STRIDE 64
  45 #define NUM_INTRA_MODE 35
  46
  47 #if defined(__GNUC__)
  48 #define ALIGN_VAR_8(T, var)  T var __attribute__((aligned(8)))
  49 #define ALIGN_VAR_16(T, var) T var __attribute__((aligned(16)))
  50 #define ALIGN_VAR_32(T, var) T var __attribute__((aligned(32)))
  51
  52 #if X265_ARCH_X86 && !defined(X86_64)
  53 extern "C" intptr_t x265_stack_align(void (*func)(), ...);
  54 #define x265_stack_align(func, ...) x265_stack_align((void (*)())func, __VA_ARGS__)
  55 #else
  56 #define x265_stack_align(func, ...) func(__VA_ARGS__)
  57 #endif
  58
  59 #elif defined(_MSC_VER)
  60
  61 #define ALIGN_VAR_8(T, var)  __declspec(align(8)) T var
  62 #define ALIGN_VAR_16(T, var) __declspec(align(16)) T var
  63 #define ALIGN_VAR_32(T, var) __declspec(align(32)) T var
  64 #define x265_stack_align(func, ...) func(__VA_ARGS__)
  65 #define fseeko _fseeki64
  66
  67 #endif // if defined(__GNUC__)
  68
  69 #if HAVE_INT_TYPES_H
  70 #define __STDC_FORMAT_MACROS
  71 #include <inttypes.h>
  72 #define X265_LL "%" PRIu64
  73 #else
  74 #define X265_LL "%lld"
  75 #endif
  76
  77 #if _DEBUG && defined(_MSC_VER)
  78 #define DEBUG_BREAK() __debugbreak()
  79 #elif __APPLE_CC__
  80 #define DEBUG_BREAK() __builtin_trap();
  81 #else
  82 #define DEBUG_BREAK()
  83 #endif
  84
  85 /* If compiled with CHECKED_BUILD perform run-time checks and log any that
  86  * fail, both to stderr and to a file */
  87 #if CHECKED_BUILD || _DEBUG
  88 #define X265_CHECK(expr, ...) if (!(expr)) { \
  89     x265_log(NULL, X265_LOG_ERROR, __VA_ARGS__); \
  90     DEBUG_BREAK(); \
  91     FILE *fp = fopen("x265_check_failures.txt", "a"); \
  92     if (fp) { fprintf(fp, "%s:%d\n", __FILE__, __LINE__); fprintf(fp, __VA_ARGS__); fclose(fp); } \
  93 }
  94 #if _MSC_VER
  95 #pragma warning(disable: 4127) // some checks have constant conditions
  96 #endif
  97 #else
  98 #define X265_CHECK(expr, ...)
  99 #endif
 100
 101 #if HIGH_BIT_DEPTH
 102 typedef uint16_t pixel;
 103 typedef uint32_t sum_t;
 104 typedef uint64_t sum2_t;
 105 typedef uint64_t pixel4;
 106 typedef int64_t  ssum2_t;
 107 #define X265_DEPTH 10          // compile time configurable bit depth
 108 #else
 109 typedef uint8_t  pixel;
 110 typedef uint16_t sum_t;
 111 typedef uint32_t sum2_t;
 112 typedef uint32_t pixel4;
 113 typedef int32_t  ssum2_t;      //Signed sum
 114 #define X265_DEPTH 8           // compile time configurable bit depth
 115 #endif // if HIGH_BIT_DEPTH
 116
 117 #ifndef NULL
 118 #define NULL 0
 119 #endif
 120
 121 #define MAX_UINT        0xFFFFFFFFU // max. value of unsigned 32-bit integer
 122 #define MAX_INT         2147483647  // max. value of signed 32-bit integer
 123 #define MAX_INT64       0x7FFFFFFFFFFFFFFFLL  // max. value of signed 64-bit integer
 124 #define MAX_DOUBLE      1.7e+308    // max. value of double-type value
 125
 126 #define QP_MIN          0
 127 #define QP_MAX_SPEC     51 /* max allowed signaled QP in HEVC */
 128 #define QP_MAX_MAX      69 /* max allowed QP to be output by rate control */
 129
 130 #define MIN_QPSCALE     0.21249999999999999
 131 #define MAX_MAX_QPSCALE 615.46574234477100
 132
 133 #define BITS_FOR_POC 8
 134
 135 template<typename T>
 136 inline pixel Clip(T x)
 137 {
 138     return (pixel)std::min<T>(T((1 << X265_DEPTH) - 1), std::max<T>(T(0), x));
 139 }
 140
 141 template<typename T>
 142 inline T Clip3(T minVal, T maxVal, T a)
 143 {
 144     return std::min<T>(std::max<T>(minVal, a), maxVal);
 145 }
 146
 147 template<typename T>
 148 inline T x265_min(T a, T b) { return a < b ? a : b; }
 149
 150 template<typename T>
 151 inline T x265_max(T a, T b) { return a > b ? a : b; }
 152
 153 typedef int16_t  coeff_t;      // transform coefficient
 154
 155 #define X265_MIN(a, b) ((a) < (b) ? (a) : (b))
 156 #define X265_MAX(a, b) ((a) > (b) ? (a) : (b))
 157 #define COPY1_IF_LT(x, y) if ((y) < (x)) (x) = (y);
 158 #define COPY2_IF_LT(x, y, a, b) \
 159     if ((y) < (x)) \
 160     { \
 161         (x) = (y); \
 162         (a) = (b); \
 163     }
 164 #define COPY3_IF_LT(x, y, a, b, c, d) \
 165     if ((y) < (x)) \
 166     { \
 167         (x) = (y); \
 168         (a) = (b); \
 169         (c) = (d); \
 170     }
 171 #define COPY4_IF_LT(x, y, a, b, c, d, e, f) \
 172     if ((y) < (x)) \
 173     { \
 174         (x) = (y); \
 175         (a) = (b); \
 176         (c) = (d); \
 177         (e) = (f); \
 178     }
 179 #define X265_MIN3(a, b, c) X265_MIN((a), X265_MIN((b), (c)))
 180 #define X265_MAX3(a, b, c) X265_MAX((a), X265_MAX((b), (c)))
 181 #define X265_MIN4(a, b, c, d) X265_MIN((a), X265_MIN3((b), (c), (d)))
 182 #define X265_MAX4(a, b, c, d) X265_MAX((a), X265_MAX3((b), (c), (d)))
 183 #define QP_BD_OFFSET (6 * (X265_DEPTH - 8))
 184 #define MAX_CHROMA_LAMBDA_OFFSET 36
 185
 186 // arbitrary, but low because SATD scores are 1/4 normal
 187 #define X265_LOOKAHEAD_QP (12 + QP_BD_OFFSET)
 188 #define X265_LOOKAHEAD_MAX 250
 189
 190 // Use the same size blocks as x264.  Using larger blocks seems to give artificially
 191 // high cost estimates (intra and inter both suffer)
 192 #define X265_LOWRES_CU_SIZE   8
 193 #define X265_LOWRES_CU_BITS   3
 194
 195 #define X265_MALLOC(type, count)    (type*)x265_malloc(sizeof(type) * (count))
 196 #define X265_FREE(ptr)              x265_free(ptr)
 197 #define CHECKED_MALLOC(var, type, count) \
 198     { \
 199         var = (type*)x265_malloc(sizeof(type) * (count)); \
 200         if (!var) \
 201         { \
 202             x265_log(NULL, X265_LOG_ERROR, "malloc of size %d failed\n", sizeof(type) * (count)); \
 203             goto fail; \
 204         } \
 205     }
 206 #define CHECKED_MALLOC_ZERO(var, type, count) \
 207     { \
 208         var = (type*)x265_malloc(sizeof(type) * (count)); \
 209         if (var) \
 210             memset((void*)var, 0, sizeof(type) * (count)); \
 211         else \
 212         { \
 213             x265_log(NULL, X265_LOG_ERROR, "malloc of size %d failed\n", sizeof(type) * (count)); \
 214             goto fail; \
 215         } \
 216     }
 217
 218 #if defined(_MSC_VER)
 219 #define X265_LOG2F(x) (logf((float)(x)) * 1.44269504088896405f)
 220 #define X265_LOG2(x) (log((double)(x)) * 1.4426950408889640513713538072172)
 221 #else
 222 #define X265_LOG2F(x) log2f(x)
 223 #define X265_LOG2(x)  log2(x)
 224 #endif
 225
 226 #define NUM_CU_DEPTH            4                           // maximum number of CU depths
 227 #define NUM_FULL_DEPTH          5                           // maximum number of full depths
 228 #define MIN_LOG2_CU_SIZE        3                           // log2(minCUSize)
 229 #define MAX_LOG2_CU_SIZE        6                           // log2(maxCUSize)
 230 #define MIN_CU_SIZE             (1 << MIN_LOG2_CU_SIZE)     // minimum allowable size of CU
 231 #define MAX_CU_SIZE             (1 << MAX_LOG2_CU_SIZE)     // maximum allowable size of CU
 232
 233 #define LOG2_UNIT_SIZE          2                           // log2(unitSize)
 234 #define UNIT_SIZE               (1 << LOG2_UNIT_SIZE)       // unit size of CU partition
 235
 236 #define MAX_NUM_PARTITIONS      256
 237 #define NUM_CU_PARTITIONS       (1U << (g_maxFullDepth << 1))
 238
 239 #define MIN_PU_SIZE             4
 240 #define MIN_TU_SIZE             4
 241 #define MAX_NUM_SPU_W           (MAX_CU_SIZE / MIN_PU_SIZE) // maximum number of SPU in horizontal line
 242
 243 #define MAX_LOG2_TR_SIZE 5
 244 #define MAX_LOG2_TS_SIZE 2 // TODO: RExt
 245 #define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE)
 246 #define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE)
 247
 248 #define MAX_NUM_TR_COEFFS        MAX_TR_SIZE * MAX_TR_SIZE /* Maximum number of transform coefficients, for a 32x32 transform */
 249 #define MAX_NUM_TR_CATEGORIES    8                         /* 32, 16, 8, 4 transform categories each for luma and chroma */
 250
 251 #define COEF_REMAIN_BIN_REDUCTION   3 // indicates the level at which the VLC
 252                                       // transitions from Golomb-Rice to TU+EG(k)
 253
 254 #define SBH_THRESHOLD               4 // fixed sign bit hiding controlling threshold
 255
 256 #define C1FLAG_NUMBER               8 // maximum number of largerThan1 flag coded in one chunk:  16 in HM5
 257 #define C2FLAG_NUMBER               1 // maximum number of largerThan2 flag coded in one chunk:  16 in HM5
 258
 259 #define SAO_ENCODING_RATE           0.75
 260 #define SAO_ENCODING_RATE_CHROMA    0.5
 261
 262 #define MLS_GRP_NUM                 64 // Max number of coefficient groups, max(16, 64)
 263 #define MLS_CG_SIZE                 4  // Coefficient group size of 4x4
 264 #define MLS_CG_LOG2_SIZE            2
 265
 266 #define QUANT_IQUANT_SHIFT          20 // Q(QP%6) * IQ(QP%6) = 2^20
 267 #define QUANT_SHIFT                 14 // Q(4) = 2^14
 268 #define SCALE_BITS                  15 // Inherited from TMuC, presumably for fractional bit estimates in RDOQ
 269 #define MAX_TR_DYNAMIC_RANGE        15 // Maximum transform dynamic range (excluding sign bit)
 270
 271 #define SHIFT_INV_1ST               7  // Shift after first inverse transform stage
 272 #define SHIFT_INV_2ND               12 // Shift after second inverse transform stage
 273
 274 #define AMVP_DECIMATION_FACTOR      4
 275
 276 #define SCAN_SET_SIZE               16
 277 #define LOG2_SCAN_SET_SIZE          4
 278
 279 #define ALL_IDX                     -1
 280 #define PLANAR_IDX                  0
 281 #define VER_IDX                     26 // index for intra VERTICAL   mode
 282 #define HOR_IDX                     10 // index for intra HORIZONTAL mode
 283 #define DC_IDX                      1  // index for intra DC mode
 284 #define NUM_CHROMA_MODE             5  // total number of chroma modes
 285 #define DM_CHROMA_IDX               36 // chroma mode index for derived from luma intra mode
 286
 287 #define MDCS_ANGLE_LIMIT            4 // distance from true angle that horiz or vertical scan is allowed
 288 #define MDCS_LOG2_MAX_SIZE          3 // TUs with log2 of size greater than this can only use diagonal scan
 289
 290 #define MAX_NUM_REF_PICS            16 // max. number of pictures used for reference
 291 #define MAX_NUM_REF                 16 // max. number of entries in picture reference list
 292
 293 #define REF_NOT_VALID               -1
 294
 295 #define AMVP_NUM_CANDS              2 // number of AMVP candidates
 296 #define MRG_MAX_NUM_CANDS           5 // max number of final merge candidates
 297
 298 #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)
 299 #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
 300
 301 namespace x265 {
 302
 303 enum { SAO_NUM_OFFSET = 4 };
 304
 305 // NOTE: MUST be alignment to 16 or 32 bytes for asm code
 306 struct NoiseReduction
 307 {
 308     /* 0 = luma 4x4, 1 = luma 8x8, 2 = luma 16x16, 3 = luma 32x32
 309      * 4 = chroma 4x4, 5 = chroma 8x8, 6 = chroma 16x16, 7 = chroma 32x32 */
 310     uint16_t offsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
 311     uint32_t residualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
 312     uint32_t count[MAX_NUM_TR_CATEGORIES];
 313 };
 314
 315 enum SaoMergeMode
 316 {
 317     SAO_MERGE_NONE,
 318     SAO_MERGE_LEFT,
 319     SAO_MERGE_UP
 320 };
 321
 322 struct SaoCtuParam
 323 {
 324     SaoMergeMode mergeMode;
 325     int  typeIdx;
 326     uint32_t bandPos;    // BO band position
 327     int  offset[SAO_NUM_OFFSET];
 328
 329     void reset()
 330     {
 331         mergeMode = SAO_MERGE_NONE;
 332         typeIdx = -1;
 333         bandPos = 0;
 334         offset[0] = 0;
 335         offset[1] = 0;
 336         offset[2] = 0;
 337         offset[3] = 0;
 338     }
 339 };
 340
 341 struct SAOParam
 342 {
 343     SaoCtuParam* ctuParam[3];
 344     bool         bSaoFlag[2];
 345     int          numCuInWidth;
 346
 347     SAOParam()
 348     {
 349         for (int i = 0; i < 3; i++)
 350             ctuParam[i] = NULL;
 351     }
 352
 353     ~SAOParam()
 354     {
 355         delete[] ctuParam[0];
 356         delete[] ctuParam[1];
 357         delete[] ctuParam[2];
 358     }
 359 };
 360
 361 enum TextType
 362 {
 363     TEXT_LUMA     = 0,  // luma
 364     TEXT_CHROMA_U = 1,  // chroma U
 365     TEXT_CHROMA_V = 2,  // chroma V
 366     MAX_NUM_COMPONENT = 3
 367 };
 368
 369 // coefficient scanning type used in ACS
 370 enum ScanType
 371 {
 372     SCAN_DIAG = 0,     // up-right diagonal scan
 373     SCAN_HOR = 1,      // horizontal first scan
 374     SCAN_VER = 2,      // vertical first scan
 375     NUM_SCAN_TYPE = 3
 376 };
 377
 378 enum SignificanceMapContextType
 379 {
 380     CONTEXT_TYPE_4x4 = 0,
 381     CONTEXT_TYPE_8x8 = 1,
 382     CONTEXT_TYPE_NxN = 2,
 383     CONTEXT_NUMBER_OF_TYPES = 3
 384 };
 385 }
 386
 387 /* outside x265 namespace, but prefixed. defined in common.cpp */
 388 int64_t  x265_mdate(void);
 389 void     x265_log(const x265_param *param, int level, const char *fmt, ...);
 390 int      x265_exp2fix8(double x);
 391
 392 double   x265_ssim2dB(double ssim);
 393 double   x265_qScale2qp(double qScale);
 394 double   x265_qp2qScale(double qp);
 395 uint32_t x265_picturePlaneSize(int csp, int width, int height, int plane);
 396
 397 void*    x265_malloc(size_t size);
 398 void     x265_free(void *ptr);
 399 char*    x265_slurp_file(const char *filename);
 400
 401 #include "constants.h"
 402
 403 #endif // ifndef X265_COMMON_H