Imported Upstream version 1.4+222+hg5f9f7194267b
[deb_x265.git] / source / common / x86 / pixel-util.h
CommitLineData
72b9787e
JB
1/*****************************************************************************
2 * Copyright (C) 2013 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24#ifndef X265_PIXEL_UTIL_H
25#define X265_PIXEL_UTIL_H
26
b53f7c52
JB
27void x265_getResidual4_sse2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
28void x265_getResidual8_sse2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
29void x265_getResidual16_sse2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
30void x265_getResidual16_sse4(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
31void x265_getResidual32_sse2(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
32void x265_getResidual32_sse4(const pixel* fenc, const pixel* pred, int16_t* residual, intptr_t stride);
33
34void x265_transpose4_sse2(pixel* dest, const pixel* src, intptr_t stride);
35void x265_transpose8_sse2(pixel* dest, const pixel* src, intptr_t stride);
36void x265_transpose16_sse2(pixel* dest, const pixel* src, intptr_t stride);
37void x265_transpose32_sse2(pixel* dest, const pixel* src, intptr_t stride);
38void x265_transpose64_sse2(pixel* dest, const pixel* src, intptr_t stride);
39
40void x265_transpose8_avx2(pixel* dest, const pixel* src, intptr_t stride);
41void x265_transpose16_avx2(pixel* dest, const pixel* src, intptr_t stride);
42void x265_transpose32_avx2(pixel* dest, const pixel* src, intptr_t stride);
43void x265_transpose64_avx2(pixel* dest, const pixel* src, intptr_t stride);
44
45uint32_t x265_quant_sse4(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);
46uint32_t x265_quant_avx2(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);
47uint32_t x265_nquant_sse4(const int16_t* coef, const int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff);
48uint32_t x265_nquant_avx2(const int16_t* coef, const int32_t* quantCoeff, int16_t* qCoef, int qBits, int add, int numCoeff);
49void x265_dequant_normal_sse4(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift);
50void x265_dequant_normal_avx2(const int16_t* quantCoef, int16_t* coef, int num, int scale, int shift);
51int x265_count_nonzero_ssse3(const int16_t* quantCoeff, int numCoeff);
52
53void x265_weight_pp_sse4(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
54void x265_weight_pp_avx2(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
55void x265_weight_sp_sse4(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
56
57void x265_pixel_ssim_4x4x2_core_mmx2(const uint8_t* pix1, intptr_t stride1,
58 const uint8_t* pix2, intptr_t stride2, int sums[2][4]);
59void x265_pixel_ssim_4x4x2_core_sse2(const pixel* pix1, intptr_t stride1,
60 const pixel* pix2, intptr_t stride2, int sums[2][4]);
61void x265_pixel_ssim_4x4x2_core_avx(const pixel* pix1, intptr_t stride1,
62 const pixel* pix2, intptr_t stride2, int sums[2][4]);
72b9787e
JB
63float x265_pixel_ssim_end4_sse2(int sum0[5][4], int sum1[5][4], int width);
64float x265_pixel_ssim_end4_avx(int sum0[5][4], int sum1[5][4], int width);
65
b53f7c52
JB
66void x265_scale1D_128to64_ssse3(pixel*, const pixel*, intptr_t);
67void x265_scale1D_128to64_avx2(pixel*, const pixel*, intptr_t);
68void x265_scale2D_64to32_ssse3(pixel*, const pixel*, intptr_t);
72b9787e
JB
69
70#define SETUP_CHROMA_PIXELSUB_PS_FUNC(W, H, cpu) \
b53f7c52
JB
71 void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t* dest, intptr_t destride, const pixel* src0, const pixel* src1, intptr_t srcstride0, intptr_t srcstride1); \
72 void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel* dest, intptr_t destride, const pixel* src0, const int16_t* scr1, intptr_t srcStride0, intptr_t srcStride1);
72b9787e
JB
73
74#define CHROMA_PIXELSUB_DEF(cpu) \
75 SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 4, cpu); \
76 SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 8, cpu); \
77 SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 16, cpu); \
78 SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 32, cpu);
79
80#define CHROMA_PIXELSUB_DEF_422(cpu) \
81 SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 8, cpu); \
82 SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 16, cpu); \
83 SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 32, cpu); \
84 SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 64, cpu);
85
86#define SETUP_LUMA_PIXELSUB_PS_FUNC(W, H, cpu) \
b53f7c52
JB
87 void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t* dest, intptr_t destride, const pixel* src0, const pixel* src1, intptr_t srcstride0, intptr_t srcstride1); \
88 void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel* dest, intptr_t destride, const pixel* src0, const int16_t* scr1, intptr_t srcStride0, intptr_t srcStride1);
72b9787e
JB
89
90#define LUMA_PIXELSUB_DEF(cpu) \
91 SETUP_LUMA_PIXELSUB_PS_FUNC(8, 8, cpu); \
92 SETUP_LUMA_PIXELSUB_PS_FUNC(16, 16, cpu); \
93 SETUP_LUMA_PIXELSUB_PS_FUNC(32, 32, cpu); \
94 SETUP_LUMA_PIXELSUB_PS_FUNC(64, 64, cpu);
95
96CHROMA_PIXELSUB_DEF(_sse4);
97LUMA_PIXELSUB_DEF(_sse4);
98CHROMA_PIXELSUB_DEF(_sse2);
99LUMA_PIXELSUB_DEF(_sse2);
100
101CHROMA_PIXELSUB_DEF_422(_sse4);
102CHROMA_PIXELSUB_DEF_422(_sse2);
103
104#define SETUP_LUMA_PIXELVAR_FUNC(W, H, cpu) \
b53f7c52 105 uint64_t x265_pixel_var_ ## W ## x ## H ## cpu(const pixel* pix, intptr_t pixstride);
72b9787e
JB
106
107#define LUMA_PIXELVAR_DEF(cpu) \
108 SETUP_LUMA_PIXELVAR_FUNC(8, 8, cpu); \
109 SETUP_LUMA_PIXELVAR_FUNC(16, 16, cpu); \
110 SETUP_LUMA_PIXELVAR_FUNC(32, 32, cpu); \
111 SETUP_LUMA_PIXELVAR_FUNC(64, 64, cpu);
112
113LUMA_PIXELVAR_DEF(_sse2);
114
115#undef CHROMA_PIXELSUB_DEF
116#undef CHROMA_PIXELSUB_DEF_422
117#undef LUMA_PIXELSUB_DEF
118#undef LUMA_PIXELVAR_DEF
119#undef SETUP_CHROMA_PIXELSUB_PS_FUNC
120#undef SETUP_LUMA_PIXELSUB_PS_FUNC
121#undef SETUP_LUMA_PIXELVAR_FUNC
122
123#endif // ifndef X265_PIXEL_UTIL_H