Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Authors: Steve Borho <steve@borho.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com. | |
22 | *****************************************************************************/ | |
23 | ||
24 | #ifndef X265_BLOCKCOPY8_H | |
25 | #define X265_BLOCKCOPY8_H | |
26 | ||
27 | void x265_cvt32to16_shr_sse2(int16_t * dst, int *src, intptr_t, int, int); | |
28 | void x265_cvt32to16_shl_4_sse2(int16_t * dst, int *src, intptr_t, int); | |
29 | void x265_cvt32to16_shl_8_sse2(int16_t * dst, int *src, intptr_t, int); | |
30 | void x265_cvt32to16_shl_16_sse2(int16_t * dst, int *src, intptr_t, int); | |
31 | void x265_cvt32to16_shl_32_sse2(int16_t * dst, int *src, intptr_t, int); | |
32 | void x265_cvt32to16_shl_4_avx2(int16_t * dst, int *src, intptr_t, int); | |
33 | void x265_cvt32to16_shl_8_avx2(int16_t * dst, int *src, intptr_t, int); | |
34 | void x265_cvt32to16_shl_16_avx2(int16_t * dst, int *src, intptr_t, int); | |
35 | void x265_cvt32to16_shl_32_avx2(int16_t * dst, int *src, intptr_t, int); | |
36 | void x265_cvt16to32_shl_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t); | |
37 | void x265_cvt16to32_shr_4_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t); | |
38 | void x265_cvt16to32_shr_8_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t); | |
39 | void x265_cvt16to32_shr_16_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t); | |
40 | void x265_cvt16to32_shr_32_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t); | |
41 | void x265_copy_shr_sse4(int16_t * dst, int16_t *src, intptr_t, int, int); | |
42 | void x265_copy_shl_4_sse2(int16_t * dst, int16_t *src, intptr_t, int); | |
43 | void x265_copy_shl_8_sse2(int16_t * dst, int16_t *src, intptr_t, int); | |
44 | void x265_copy_shl_16_sse2(int16_t * dst, int16_t *src, intptr_t, int); | |
45 | void x265_copy_shl_32_sse2(int16_t * dst, int16_t *src, intptr_t, int); | |
46 | uint32_t x265_copy_cnt_4_sse4(int16_t * dst, int16_t * src, intptr_t); | |
47 | uint32_t x265_copy_cnt_8_sse4(int16_t * dst, int16_t * src, intptr_t); | |
48 | uint32_t x265_copy_cnt_16_sse4(int16_t * dst, int16_t * src, intptr_t); | |
49 | uint32_t x265_copy_cnt_32_sse4(int16_t * dst, int16_t * src, intptr_t); | |
50 | uint32_t x265_copy_cnt_4_avx2(int16_t * dst, int16_t * src, intptr_t); | |
51 | uint32_t x265_copy_cnt_8_avx2(int16_t * dst, int16_t * src, intptr_t); | |
52 | uint32_t x265_copy_cnt_16_avx2(int16_t * dst, int16_t * src, intptr_t); | |
53 | uint32_t x265_copy_cnt_32_avx2(int16_t * dst, int16_t * src, intptr_t); | |
54 | ||
55 | #define SETUP_BLOCKCOPY_FUNC(W, H, cpu) \ | |
56 | void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); \ | |
57 | void x265_blockcopy_sp_ ## W ## x ## H ## cpu(pixel * a, intptr_t stridea, int16_t * b, intptr_t strideb); \ | |
58 | void x265_blockcopy_ss_ ## W ## x ## H ## cpu(int16_t * a, intptr_t stridea, int16_t * b, intptr_t strideb); | |
59 | ||
60 | #define SETUP_BLOCKCOPY_PS(W, H, cpu) \ | |
61 | void x265_blockcopy_ps_ ## W ## x ## H ## cpu(int16_t * dst, intptr_t dstStride, pixel * src, intptr_t srcStride); | |
62 | ||
63 | #define SETUP_BLOCKCOPY_SP(W, H, cpu) \ | |
64 | void x265_blockcopy_sp_ ## W ## x ## H ## cpu(pixel * a, intptr_t stridea, int16_t * b, intptr_t strideb); | |
65 | ||
66 | #define SETUP_BLOCKCOPY_SS_PP(W, H, cpu) \ | |
67 | void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); \ | |
68 | void x265_blockcopy_ss_ ## W ## x ## H ## cpu(int16_t * a, intptr_t stridea, int16_t * b, intptr_t strideb); | |
69 | ||
70 | #define BLOCKCOPY_COMMON(cpu) \ | |
71 | SETUP_BLOCKCOPY_FUNC(4, 4, cpu); \ | |
72 | SETUP_BLOCKCOPY_FUNC(4, 2, cpu); \ | |
73 | SETUP_BLOCKCOPY_FUNC(8, 8, cpu); \ | |
74 | SETUP_BLOCKCOPY_FUNC(8, 4, cpu); \ | |
75 | SETUP_BLOCKCOPY_FUNC(4, 8, cpu); \ | |
76 | SETUP_BLOCKCOPY_FUNC(8, 6, cpu); \ | |
77 | SETUP_BLOCKCOPY_FUNC(8, 2, cpu); \ | |
78 | SETUP_BLOCKCOPY_FUNC(16, 16, cpu); \ | |
79 | SETUP_BLOCKCOPY_FUNC(16, 8, cpu); \ | |
80 | SETUP_BLOCKCOPY_FUNC(8, 16, cpu); \ | |
81 | SETUP_BLOCKCOPY_FUNC(16, 12, cpu); \ | |
82 | SETUP_BLOCKCOPY_FUNC(12, 16, cpu); \ | |
83 | SETUP_BLOCKCOPY_FUNC(16, 4, cpu); \ | |
84 | SETUP_BLOCKCOPY_FUNC(4, 16, cpu); \ | |
85 | SETUP_BLOCKCOPY_FUNC(32, 32, cpu); \ | |
86 | SETUP_BLOCKCOPY_FUNC(32, 16, cpu); \ | |
87 | SETUP_BLOCKCOPY_FUNC(16, 32, cpu); \ | |
88 | SETUP_BLOCKCOPY_FUNC(32, 24, cpu); \ | |
89 | SETUP_BLOCKCOPY_FUNC(24, 32, cpu); \ | |
90 | SETUP_BLOCKCOPY_FUNC(32, 8, cpu); \ | |
91 | SETUP_BLOCKCOPY_FUNC(8, 32, cpu); \ | |
92 | SETUP_BLOCKCOPY_FUNC(64, 64, cpu); \ | |
93 | SETUP_BLOCKCOPY_FUNC(64, 32, cpu); \ | |
94 | SETUP_BLOCKCOPY_FUNC(32, 64, cpu); \ | |
95 | SETUP_BLOCKCOPY_FUNC(64, 48, cpu); \ | |
96 | SETUP_BLOCKCOPY_FUNC(48, 64, cpu); \ | |
97 | SETUP_BLOCKCOPY_FUNC(64, 16, cpu); \ | |
98 | SETUP_BLOCKCOPY_FUNC(16, 64, cpu); | |
99 | ||
100 | #define BLOCKCOPY_SP(cpu) \ | |
101 | SETUP_BLOCKCOPY_SP(2, 4, cpu); \ | |
102 | SETUP_BLOCKCOPY_SP(2, 8, cpu); \ | |
103 | SETUP_BLOCKCOPY_SP(6, 8, cpu); \ | |
104 | \ | |
105 | SETUP_BLOCKCOPY_SP(2, 16, cpu); \ | |
106 | SETUP_BLOCKCOPY_SP(4, 32, cpu); \ | |
107 | SETUP_BLOCKCOPY_SP(6, 16, cpu); \ | |
108 | SETUP_BLOCKCOPY_SP(8, 12, cpu); \ | |
109 | SETUP_BLOCKCOPY_SP(8, 64, cpu); \ | |
110 | SETUP_BLOCKCOPY_SP(12, 32, cpu); \ | |
111 | SETUP_BLOCKCOPY_SP(16, 24, cpu); \ | |
112 | SETUP_BLOCKCOPY_SP(24, 64, cpu); \ | |
113 | SETUP_BLOCKCOPY_SP(32, 48, cpu); | |
114 | ||
115 | #define BLOCKCOPY_SS_PP(cpu) \ | |
116 | SETUP_BLOCKCOPY_SS_PP(2, 4, cpu); \ | |
117 | SETUP_BLOCKCOPY_SS_PP(2, 8, cpu); \ | |
118 | SETUP_BLOCKCOPY_SS_PP(6, 8, cpu); \ | |
119 | \ | |
120 | SETUP_BLOCKCOPY_SS_PP(2, 16, cpu); \ | |
121 | SETUP_BLOCKCOPY_SS_PP(4, 32, cpu); \ | |
122 | SETUP_BLOCKCOPY_SS_PP(6, 16, cpu); \ | |
123 | SETUP_BLOCKCOPY_SS_PP(8, 12, cpu); \ | |
124 | SETUP_BLOCKCOPY_SS_PP(8, 64, cpu); \ | |
125 | SETUP_BLOCKCOPY_SS_PP(12, 32, cpu); \ | |
126 | SETUP_BLOCKCOPY_SS_PP(16, 24, cpu); \ | |
127 | SETUP_BLOCKCOPY_SS_PP(24, 64, cpu); \ | |
128 | SETUP_BLOCKCOPY_SS_PP(32, 48, cpu); | |
129 | ||
130 | ||
131 | #define BLOCKCOPY_PS(cpu) \ | |
132 | SETUP_BLOCKCOPY_PS(2, 4, cpu); \ | |
133 | SETUP_BLOCKCOPY_PS(2, 8, cpu); \ | |
134 | SETUP_BLOCKCOPY_PS(4, 2, cpu); \ | |
135 | SETUP_BLOCKCOPY_PS(4, 4, cpu); \ | |
136 | SETUP_BLOCKCOPY_PS(4, 8, cpu); \ | |
137 | SETUP_BLOCKCOPY_PS(4, 16, cpu); \ | |
138 | SETUP_BLOCKCOPY_PS(6, 8, cpu); \ | |
139 | SETUP_BLOCKCOPY_PS(8, 2, cpu); \ | |
140 | SETUP_BLOCKCOPY_PS(8, 4, cpu); \ | |
141 | SETUP_BLOCKCOPY_PS(8, 6, cpu); \ | |
142 | SETUP_BLOCKCOPY_PS(8, 8, cpu); \ | |
143 | SETUP_BLOCKCOPY_PS(8, 16, cpu); \ | |
144 | SETUP_BLOCKCOPY_PS(8, 32, cpu); \ | |
145 | SETUP_BLOCKCOPY_PS(12, 16, cpu); \ | |
146 | SETUP_BLOCKCOPY_PS(16, 4, cpu); \ | |
147 | SETUP_BLOCKCOPY_PS(16, 8, cpu); \ | |
148 | SETUP_BLOCKCOPY_PS(16, 12, cpu); \ | |
149 | SETUP_BLOCKCOPY_PS(16, 16, cpu); \ | |
150 | SETUP_BLOCKCOPY_PS(16, 32, cpu); \ | |
151 | SETUP_BLOCKCOPY_PS(24, 32, cpu); \ | |
152 | SETUP_BLOCKCOPY_PS(32, 8, cpu); \ | |
153 | SETUP_BLOCKCOPY_PS(32, 16, cpu); \ | |
154 | SETUP_BLOCKCOPY_PS(32, 24, cpu); \ | |
155 | SETUP_BLOCKCOPY_PS(32, 32, cpu); \ | |
156 | SETUP_BLOCKCOPY_PS(16, 64, cpu); \ | |
157 | SETUP_BLOCKCOPY_PS(32, 64, cpu); \ | |
158 | SETUP_BLOCKCOPY_PS(48, 64, cpu); \ | |
159 | SETUP_BLOCKCOPY_PS(64, 16, cpu); \ | |
160 | SETUP_BLOCKCOPY_PS(64, 32, cpu); \ | |
161 | SETUP_BLOCKCOPY_PS(64, 48, cpu); \ | |
162 | SETUP_BLOCKCOPY_PS(64, 64, cpu); \ | |
163 | \ | |
164 | SETUP_BLOCKCOPY_PS(2, 16, cpu); \ | |
165 | SETUP_BLOCKCOPY_PS(4, 32, cpu); \ | |
166 | SETUP_BLOCKCOPY_PS(6, 16, cpu); \ | |
167 | SETUP_BLOCKCOPY_PS(8, 12, cpu); \ | |
168 | SETUP_BLOCKCOPY_PS(8, 64, cpu); \ | |
169 | SETUP_BLOCKCOPY_PS(12, 32, cpu); \ | |
170 | SETUP_BLOCKCOPY_PS(16, 24, cpu); \ | |
171 | SETUP_BLOCKCOPY_PS(24, 64, cpu); \ | |
172 | SETUP_BLOCKCOPY_PS(32, 48, cpu); | |
173 | ||
174 | BLOCKCOPY_COMMON(_sse2); | |
175 | BLOCKCOPY_SS_PP(_sse2); | |
176 | BLOCKCOPY_SP(_sse4); | |
177 | BLOCKCOPY_PS(_sse4); | |
178 | ||
179 | BLOCKCOPY_SP(_sse2); | |
180 | ||
181 | void x265_blockfill_s_4x4_sse2(int16_t *dst, intptr_t dstride, int16_t val); | |
182 | void x265_blockfill_s_8x8_sse2(int16_t *dst, intptr_t dstride, int16_t val); | |
183 | void x265_blockfill_s_16x16_sse2(int16_t *dst, intptr_t dstride, int16_t val); | |
184 | void x265_blockfill_s_32x32_sse2(int16_t *dst, intptr_t dstride, int16_t val); | |
185 | void x265_blockcopy_ss_16x4_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride); | |
186 | void x265_blockcopy_ss_16x8_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride); | |
187 | void x265_blockcopy_ss_16x12_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride); | |
188 | void x265_blockcopy_ss_16x16_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride); | |
189 | void x265_blockcopy_ss_16x24_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride); | |
190 | void x265_blockcopy_ss_16x32_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride); | |
191 | void x265_blockcopy_ss_16x64_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride); | |
192 | void x265_blockcopy_ss_64x16_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride); | |
193 | void x265_blockcopy_ss_64x32_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride); | |
194 | void x265_blockcopy_ss_64x48_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride); | |
195 | void x265_blockcopy_ss_64x64_avx(int16_t *dest, intptr_t deststride, int16_t *src, intptr_t srcstride); | |
196 | ||
197 | void x265_blockcopy_pp_32x8_avx(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); | |
198 | void x265_blockcopy_pp_32x16_avx(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); | |
199 | void x265_blockcopy_pp_32x24_avx(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); | |
200 | void x265_blockcopy_pp_32x32_avx(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); | |
201 | void x265_blockcopy_pp_32x48_avx(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); | |
202 | void x265_blockcopy_pp_32x64_avx(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); | |
203 | ||
204 | void x265_blockfill_s_16x16_avx2(int16_t *dst, intptr_t dstride, int16_t val); | |
205 | void x265_blockfill_s_32x32_avx2(int16_t *dst, intptr_t dstride, int16_t val); | |
206 | ||
207 | #undef BLOCKCOPY_COMMON | |
208 | #undef BLOCKCOPY_SS_PP | |
209 | #undef BLOCKCOPY_SP | |
210 | #undef BLOCKCOPY_PS | |
211 | #undef SETUP_BLOCKCOPY_PS | |
212 | #undef SETUP_BLOCKCOPY_SP | |
213 | #undef SETUP_BLOCKCOPY_SS_PP | |
214 | #undef SETUP_BLOCKCOPY_FUNC | |
215 | ||
216 | #endif // ifndef X265_I386_PIXEL_H |