1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Steve Borho <steve@borho.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
24 #ifndef X265_PIXEL_UTIL_H
25 #define X265_PIXEL_UTIL_H
27 void x265_calcRecons4_sse2(pixel
* pred
, int16_t* residual
, int16_t* reconqt
, pixel
*reconipred
, int stride
, int strideqt
, int strideipred
);
28 void x265_calcRecons8_sse2(pixel
* pred
, int16_t* residual
, int16_t* reconqt
, pixel
*reconipred
, int stride
, int strideqt
, int strideipred
);
29 void x265_calcRecons16_sse2(pixel
* pred
, int16_t* residual
, int16_t* reconqt
, pixel
*reconipred
, int stride
, int strideqt
, int strideipred
);
30 void x265_calcRecons32_sse2(pixel
* pred
, int16_t* residual
, int16_t* reconqt
, pixel
*reconipred
, int stride
, int strideqt
, int strideipred
);
31 void x265_calcRecons16_sse4(pixel
* pred
, int16_t* residual
, int16_t* reconqt
, pixel
*reconipred
, int stride
, int strideqt
, int strideipred
);
32 void x265_calcRecons32_sse4(pixel
* pred
, int16_t* residual
, int16_t* reconqt
, pixel
*reconipred
, int stride
, int strideqt
, int strideipred
);
34 void x265_getResidual4_sse2(pixel
*fenc
, pixel
*pred
, int16_t *residual
, intptr_t stride
);
35 void x265_getResidual8_sse2(pixel
*fenc
, pixel
*pred
, int16_t *residual
, intptr_t stride
);
36 void x265_getResidual16_sse2(pixel
*fenc
, pixel
*pred
, int16_t *residual
, intptr_t stride
);
37 void x265_getResidual16_sse4(pixel
*fenc
, pixel
*pred
, int16_t *residual
, intptr_t stride
);
38 void x265_getResidual32_sse2(pixel
*fenc
, pixel
*pred
, int16_t *residual
, intptr_t stride
);
39 void x265_getResidual32_sse4(pixel
*fenc
, pixel
*pred
, int16_t *residual
, intptr_t stride
);
41 void x265_transpose4_sse2(pixel
*dest
, pixel
*src
, intptr_t stride
);
42 void x265_transpose8_sse2(pixel
*dest
, pixel
*src
, intptr_t stride
);
43 void x265_transpose16_sse2(pixel
*dest
, pixel
*src
, intptr_t stride
);
44 void x265_transpose32_sse2(pixel
*dest
, pixel
*src
, intptr_t stride
);
45 void x265_transpose64_sse2(pixel
*dest
, pixel
*src
, intptr_t stride
);
47 void x265_transpose8_avx2(pixel
*dest
, pixel
*src
, intptr_t stride
);
48 void x265_transpose16_avx2(pixel
*dest
, pixel
*src
, intptr_t stride
);
49 void x265_transpose32_avx2(pixel
*dest
, pixel
*src
, intptr_t stride
);
50 void x265_transpose64_avx2(pixel
*dest
, pixel
*src
, intptr_t stride
);
52 uint32_t x265_quant_sse4(int32_t *coef
, int32_t *quantCoeff
, int32_t *deltaU
, int16_t *qCoef
, int qBits
, int add
, int numCoeff
);
53 uint32_t x265_quant_avx2(int32_t *coef
, int32_t *quantCoeff
, int32_t *deltaU
, int16_t *qCoef
, int qBits
, int add
, int numCoeff
);
54 uint32_t x265_nquant_sse4(int32_t *coef
, int32_t *quantCoeff
, int16_t *qCoef
, int qBits
, int add
, int numCoeff
);
55 uint32_t x265_nquant_avx2(int32_t *coef
, int32_t *quantCoeff
, int16_t *qCoef
, int qBits
, int add
, int numCoeff
);
56 void x265_dequant_normal_sse4(const int16_t* quantCoef
, int32_t* coef
, int num
, int scale
, int shift
);
57 void x265_dequant_normal_avx2(const int16_t* quantCoef
, int32_t* coef
, int num
, int scale
, int shift
);
58 int x265_count_nonzero_ssse3(const int16_t *quantCoeff
, int numCoeff
);
60 void x265_weight_pp_sse4(pixel
*src
, pixel
*dst
, intptr_t stride
, int width
, int height
, int w0
, int round
, int shift
, int offset
);
61 void x265_weight_pp_avx2(pixel
*src
, pixel
*dst
, intptr_t stride
, int width
, int height
, int w0
, int round
, int shift
, int offset
);
62 void x265_weight_sp_sse4(int16_t *src
, pixel
*dst
, intptr_t srcStride
, intptr_t dstStride
, int width
, int height
, int w0
, int round
, int shift
, int offset
);
64 void x265_pixel_ssim_4x4x2_core_mmx2(const uint8_t * pix1
, intptr_t stride1
,
65 const uint8_t * pix2
, intptr_t stride2
, int sums
[2][4]);
66 void x265_pixel_ssim_4x4x2_core_sse2(const pixel
* pix1
, intptr_t stride1
,
67 const pixel
* pix2
, intptr_t stride2
, int sums
[2][4]);
68 void x265_pixel_ssim_4x4x2_core_avx(const pixel
* pix1
, intptr_t stride1
,
69 const pixel
* pix2
, intptr_t stride2
, int sums
[2][4]);
70 float x265_pixel_ssim_end4_sse2(int sum0
[5][4], int sum1
[5][4], int width
);
71 float x265_pixel_ssim_end4_avx(int sum0
[5][4], int sum1
[5][4], int width
);
73 void x265_scale1D_128to64_ssse3(pixel
*, pixel
*, intptr_t);
74 void x265_scale1D_128to64_avx2(pixel
*, pixel
*, intptr_t);
75 void x265_scale2D_64to32_ssse3(pixel
*, pixel
*, intptr_t);
77 #define SETUP_CHROMA_PIXELSUB_PS_FUNC(W, H, cpu) \
78 void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t * dest, intptr_t destride, pixel * src0, pixel * src1, intptr_t srcstride0, intptr_t srcstride1); \
79 void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel * dest, intptr_t destride, pixel * src0, int16_t * scr1, intptr_t srcStride0, intptr_t srcStride1);
81 #define CHROMA_PIXELSUB_DEF(cpu) \
82 SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 4, cpu); \
83 SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 8, cpu); \
84 SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 16, cpu); \
85 SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 32, cpu);
87 #define CHROMA_PIXELSUB_DEF_422(cpu) \
88 SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 8, cpu); \
89 SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 16, cpu); \
90 SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 32, cpu); \
91 SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 64, cpu);
93 #define SETUP_LUMA_PIXELSUB_PS_FUNC(W, H, cpu) \
94 void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t * dest, intptr_t destride, pixel * src0, pixel * src1, intptr_t srcstride0, intptr_t srcstride1); \
95 void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel * dest, intptr_t destride, pixel * src0, int16_t * scr1, intptr_t srcStride0, intptr_t srcStride1);
97 #define LUMA_PIXELSUB_DEF(cpu) \
98 SETUP_LUMA_PIXELSUB_PS_FUNC(8, 8, cpu); \
99 SETUP_LUMA_PIXELSUB_PS_FUNC(16, 16, cpu); \
100 SETUP_LUMA_PIXELSUB_PS_FUNC(32, 32, cpu); \
101 SETUP_LUMA_PIXELSUB_PS_FUNC(64, 64, cpu);
103 CHROMA_PIXELSUB_DEF(_sse4
);
104 LUMA_PIXELSUB_DEF(_sse4
);
105 CHROMA_PIXELSUB_DEF(_sse2
);
106 LUMA_PIXELSUB_DEF(_sse2
);
108 CHROMA_PIXELSUB_DEF_422(_sse4
);
109 CHROMA_PIXELSUB_DEF_422(_sse2
);
111 #define SETUP_LUMA_PIXELVAR_FUNC(W, H, cpu) \
112 uint64_t x265_pixel_var_ ## W ## x ## H ## cpu(pixel * pix, intptr_t pixstride);
114 #define LUMA_PIXELVAR_DEF(cpu) \
115 SETUP_LUMA_PIXELVAR_FUNC(8, 8, cpu); \
116 SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu); \
117 SETUP_LUMA_PIXELVAR_FUNC(32, 32, cpu); \
118 SETUP_LUMA_PIXELVAR_FUNC(64, 64, cpu);
120 LUMA_PIXELVAR_DEF(_sse2
);
122 #undef CHROMA_PIXELSUB_DEF
123 #undef CHROMA_PIXELSUB_DEF_422
124 #undef LUMA_PIXELSUB_DEF
125 #undef LUMA_PIXELVAR_DEF
126 #undef SETUP_CHROMA_PIXELSUB_PS_FUNC
127 #undef SETUP_LUMA_PIXELSUB_PS_FUNC
128 #undef SETUP_LUMA_PIXELVAR_FUNC
130 #endif // ifndef X265_PIXEL_UTIL_H