Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Authors: Steve Borho <steve@borho.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com. | |
22 | *****************************************************************************/ | |
23 | ||
24 | #ifndef X265_BLOCKCOPY8_H | |
25 | #define X265_BLOCKCOPY8_H | |
26 | ||
b53f7c52 JB |
27 | void x265_cpy2Dto1D_shl_4_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); |
28 | void x265_cpy2Dto1D_shl_8_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); | |
29 | void x265_cpy2Dto1D_shl_16_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); | |
30 | void x265_cpy2Dto1D_shl_32_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); | |
31 | void x265_cpy2Dto1D_shr_4_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); | |
32 | void x265_cpy2Dto1D_shr_8_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); | |
33 | void x265_cpy2Dto1D_shr_16_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); | |
34 | void x265_cpy2Dto1D_shr_32_sse2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); | |
35 | void x265_cpy1Dto2D_shl_4_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); | |
36 | void x265_cpy1Dto2D_shl_8_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); | |
37 | void x265_cpy1Dto2D_shl_16_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); | |
38 | void x265_cpy1Dto2D_shl_32_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); | |
39 | void x265_cpy1Dto2D_shl_4_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift); | |
40 | void x265_cpy1Dto2D_shl_8_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift); | |
41 | void x265_cpy1Dto2D_shl_16_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift); | |
42 | void x265_cpy1Dto2D_shl_32_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift); | |
43 | void x265_cpy1Dto2D_shr_4_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride, int shift); | |
44 | void x265_cpy1Dto2D_shr_8_avx2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift); | |
45 | void x265_cpy1Dto2D_shr_16_avx2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift); | |
46 | void x265_cpy1Dto2D_shr_32_avx2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift); | |
47 | void x265_cpy1Dto2D_shr_4_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift); | |
48 | void x265_cpy1Dto2D_shr_8_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift); | |
49 | void x265_cpy1Dto2D_shr_16_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift); | |
50 | void x265_cpy1Dto2D_shr_32_sse2(int16_t* dst, const int16_t* src, intptr_t dstStride, int shift); | |
51 | uint32_t x265_copy_cnt_4_sse4(int16_t* dst, const int16_t* src, intptr_t srcStride); | |
52 | uint32_t x265_copy_cnt_8_sse4(int16_t* dst, const int16_t* src, intptr_t srcStride); | |
53 | uint32_t x265_copy_cnt_16_sse4(int16_t* dst, const int16_t* src, intptr_t srcStride); | |
54 | uint32_t x265_copy_cnt_32_sse4(int16_t* dst, const int16_t* src, intptr_t srcStride); | |
55 | uint32_t x265_copy_cnt_4_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride); | |
56 | uint32_t x265_copy_cnt_8_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride); | |
57 | uint32_t x265_copy_cnt_16_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride); | |
58 | uint32_t x265_copy_cnt_32_avx2(int16_t* dst, const int16_t* src, intptr_t srcStride); | |
72b9787e JB |
59 | |
60 | #define SETUP_BLOCKCOPY_FUNC(W, H, cpu) \ | |
b53f7c52 JB |
61 | void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb); \ |
62 | void x265_blockcopy_sp_ ## W ## x ## H ## cpu(pixel* a, intptr_t stridea, const int16_t* b, intptr_t strideb); \ | |
63 | void x265_blockcopy_ss_ ## W ## x ## H ## cpu(int16_t* a, intptr_t stridea, const int16_t* b, intptr_t strideb); | |
72b9787e JB |
64 | |
65 | #define SETUP_BLOCKCOPY_PS(W, H, cpu) \ | |
b53f7c52 | 66 | void x265_blockcopy_ps_ ## W ## x ## H ## cpu(int16_t* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride); |
72b9787e JB |
67 | |
68 | #define SETUP_BLOCKCOPY_SP(W, H, cpu) \ | |
b53f7c52 | 69 | void x265_blockcopy_sp_ ## W ## x ## H ## cpu(pixel* a, intptr_t stridea, const int16_t* b, intptr_t strideb); |
72b9787e JB |
70 | |
71 | #define SETUP_BLOCKCOPY_SS_PP(W, H, cpu) \ | |
b53f7c52 JB |
72 | void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb); \ |
73 | void x265_blockcopy_ss_ ## W ## x ## H ## cpu(int16_t* a, intptr_t stridea, const int16_t* b, intptr_t strideb); | |
72b9787e JB |
74 | |
75 | #define BLOCKCOPY_COMMON(cpu) \ | |
76 | SETUP_BLOCKCOPY_FUNC(4, 4, cpu); \ | |
77 | SETUP_BLOCKCOPY_FUNC(4, 2, cpu); \ | |
78 | SETUP_BLOCKCOPY_FUNC(8, 8, cpu); \ | |
79 | SETUP_BLOCKCOPY_FUNC(8, 4, cpu); \ | |
80 | SETUP_BLOCKCOPY_FUNC(4, 8, cpu); \ | |
81 | SETUP_BLOCKCOPY_FUNC(8, 6, cpu); \ | |
82 | SETUP_BLOCKCOPY_FUNC(8, 2, cpu); \ | |
83 | SETUP_BLOCKCOPY_FUNC(16, 16, cpu); \ | |
84 | SETUP_BLOCKCOPY_FUNC(16, 8, cpu); \ | |
85 | SETUP_BLOCKCOPY_FUNC(8, 16, cpu); \ | |
86 | SETUP_BLOCKCOPY_FUNC(16, 12, cpu); \ | |
87 | SETUP_BLOCKCOPY_FUNC(12, 16, cpu); \ | |
88 | SETUP_BLOCKCOPY_FUNC(16, 4, cpu); \ | |
89 | SETUP_BLOCKCOPY_FUNC(4, 16, cpu); \ | |
90 | SETUP_BLOCKCOPY_FUNC(32, 32, cpu); \ | |
91 | SETUP_BLOCKCOPY_FUNC(32, 16, cpu); \ | |
92 | SETUP_BLOCKCOPY_FUNC(16, 32, cpu); \ | |
93 | SETUP_BLOCKCOPY_FUNC(32, 24, cpu); \ | |
94 | SETUP_BLOCKCOPY_FUNC(24, 32, cpu); \ | |
95 | SETUP_BLOCKCOPY_FUNC(32, 8, cpu); \ | |
96 | SETUP_BLOCKCOPY_FUNC(8, 32, cpu); \ | |
97 | SETUP_BLOCKCOPY_FUNC(64, 64, cpu); \ | |
98 | SETUP_BLOCKCOPY_FUNC(64, 32, cpu); \ | |
99 | SETUP_BLOCKCOPY_FUNC(32, 64, cpu); \ | |
100 | SETUP_BLOCKCOPY_FUNC(64, 48, cpu); \ | |
101 | SETUP_BLOCKCOPY_FUNC(48, 64, cpu); \ | |
102 | SETUP_BLOCKCOPY_FUNC(64, 16, cpu); \ | |
103 | SETUP_BLOCKCOPY_FUNC(16, 64, cpu); | |
104 | ||
105 | #define BLOCKCOPY_SP(cpu) \ | |
106 | SETUP_BLOCKCOPY_SP(2, 4, cpu); \ | |
107 | SETUP_BLOCKCOPY_SP(2, 8, cpu); \ | |
108 | SETUP_BLOCKCOPY_SP(6, 8, cpu); \ | |
109 | \ | |
110 | SETUP_BLOCKCOPY_SP(2, 16, cpu); \ | |
111 | SETUP_BLOCKCOPY_SP(4, 32, cpu); \ | |
112 | SETUP_BLOCKCOPY_SP(6, 16, cpu); \ | |
113 | SETUP_BLOCKCOPY_SP(8, 12, cpu); \ | |
114 | SETUP_BLOCKCOPY_SP(8, 64, cpu); \ | |
115 | SETUP_BLOCKCOPY_SP(12, 32, cpu); \ | |
116 | SETUP_BLOCKCOPY_SP(16, 24, cpu); \ | |
117 | SETUP_BLOCKCOPY_SP(24, 64, cpu); \ | |
118 | SETUP_BLOCKCOPY_SP(32, 48, cpu); | |
119 | ||
120 | #define BLOCKCOPY_SS_PP(cpu) \ | |
121 | SETUP_BLOCKCOPY_SS_PP(2, 4, cpu); \ | |
122 | SETUP_BLOCKCOPY_SS_PP(2, 8, cpu); \ | |
123 | SETUP_BLOCKCOPY_SS_PP(6, 8, cpu); \ | |
124 | \ | |
125 | SETUP_BLOCKCOPY_SS_PP(2, 16, cpu); \ | |
126 | SETUP_BLOCKCOPY_SS_PP(4, 32, cpu); \ | |
127 | SETUP_BLOCKCOPY_SS_PP(6, 16, cpu); \ | |
128 | SETUP_BLOCKCOPY_SS_PP(8, 12, cpu); \ | |
129 | SETUP_BLOCKCOPY_SS_PP(8, 64, cpu); \ | |
130 | SETUP_BLOCKCOPY_SS_PP(12, 32, cpu); \ | |
131 | SETUP_BLOCKCOPY_SS_PP(16, 24, cpu); \ | |
132 | SETUP_BLOCKCOPY_SS_PP(24, 64, cpu); \ | |
133 | SETUP_BLOCKCOPY_SS_PP(32, 48, cpu); | |
134 | ||
135 | ||
136 | #define BLOCKCOPY_PS(cpu) \ | |
137 | SETUP_BLOCKCOPY_PS(2, 4, cpu); \ | |
138 | SETUP_BLOCKCOPY_PS(2, 8, cpu); \ | |
139 | SETUP_BLOCKCOPY_PS(4, 2, cpu); \ | |
140 | SETUP_BLOCKCOPY_PS(4, 4, cpu); \ | |
141 | SETUP_BLOCKCOPY_PS(4, 8, cpu); \ | |
142 | SETUP_BLOCKCOPY_PS(4, 16, cpu); \ | |
143 | SETUP_BLOCKCOPY_PS(6, 8, cpu); \ | |
144 | SETUP_BLOCKCOPY_PS(8, 2, cpu); \ | |
145 | SETUP_BLOCKCOPY_PS(8, 4, cpu); \ | |
146 | SETUP_BLOCKCOPY_PS(8, 6, cpu); \ | |
147 | SETUP_BLOCKCOPY_PS(8, 8, cpu); \ | |
148 | SETUP_BLOCKCOPY_PS(8, 16, cpu); \ | |
149 | SETUP_BLOCKCOPY_PS(8, 32, cpu); \ | |
150 | SETUP_BLOCKCOPY_PS(12, 16, cpu); \ | |
151 | SETUP_BLOCKCOPY_PS(16, 4, cpu); \ | |
152 | SETUP_BLOCKCOPY_PS(16, 8, cpu); \ | |
153 | SETUP_BLOCKCOPY_PS(16, 12, cpu); \ | |
154 | SETUP_BLOCKCOPY_PS(16, 16, cpu); \ | |
155 | SETUP_BLOCKCOPY_PS(16, 32, cpu); \ | |
156 | SETUP_BLOCKCOPY_PS(24, 32, cpu); \ | |
157 | SETUP_BLOCKCOPY_PS(32, 8, cpu); \ | |
158 | SETUP_BLOCKCOPY_PS(32, 16, cpu); \ | |
159 | SETUP_BLOCKCOPY_PS(32, 24, cpu); \ | |
160 | SETUP_BLOCKCOPY_PS(32, 32, cpu); \ | |
161 | SETUP_BLOCKCOPY_PS(16, 64, cpu); \ | |
162 | SETUP_BLOCKCOPY_PS(32, 64, cpu); \ | |
163 | SETUP_BLOCKCOPY_PS(48, 64, cpu); \ | |
164 | SETUP_BLOCKCOPY_PS(64, 16, cpu); \ | |
165 | SETUP_BLOCKCOPY_PS(64, 32, cpu); \ | |
166 | SETUP_BLOCKCOPY_PS(64, 48, cpu); \ | |
167 | SETUP_BLOCKCOPY_PS(64, 64, cpu); \ | |
168 | \ | |
169 | SETUP_BLOCKCOPY_PS(2, 16, cpu); \ | |
170 | SETUP_BLOCKCOPY_PS(4, 32, cpu); \ | |
171 | SETUP_BLOCKCOPY_PS(6, 16, cpu); \ | |
172 | SETUP_BLOCKCOPY_PS(8, 12, cpu); \ | |
173 | SETUP_BLOCKCOPY_PS(8, 64, cpu); \ | |
174 | SETUP_BLOCKCOPY_PS(12, 32, cpu); \ | |
175 | SETUP_BLOCKCOPY_PS(16, 24, cpu); \ | |
176 | SETUP_BLOCKCOPY_PS(24, 64, cpu); \ | |
177 | SETUP_BLOCKCOPY_PS(32, 48, cpu); | |
178 | ||
179 | BLOCKCOPY_COMMON(_sse2); | |
180 | BLOCKCOPY_SS_PP(_sse2); | |
181 | BLOCKCOPY_SP(_sse4); | |
182 | BLOCKCOPY_PS(_sse4); | |
183 | ||
184 | BLOCKCOPY_SP(_sse2); | |
185 | ||
b53f7c52 JB |
186 | void x265_blockfill_s_4x4_sse2(int16_t* dst, intptr_t dstride, int16_t val); |
187 | void x265_blockfill_s_8x8_sse2(int16_t* dst, intptr_t dstride, int16_t val); | |
188 | void x265_blockfill_s_16x16_sse2(int16_t* dst, intptr_t dstride, int16_t val); | |
189 | void x265_blockfill_s_32x32_sse2(int16_t* dst, intptr_t dstride, int16_t val); | |
190 | void x265_blockcopy_ss_16x4_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); | |
191 | void x265_blockcopy_ss_16x8_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); | |
192 | void x265_blockcopy_ss_16x12_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); | |
193 | void x265_blockcopy_ss_16x16_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); | |
194 | void x265_blockcopy_ss_16x24_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); | |
195 | void x265_blockcopy_ss_16x32_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); | |
196 | void x265_blockcopy_ss_16x64_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); | |
197 | void x265_blockcopy_ss_64x16_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); | |
198 | void x265_blockcopy_ss_64x32_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); | |
199 | void x265_blockcopy_ss_64x48_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); | |
200 | void x265_blockcopy_ss_64x64_avx(int16_t* dst, intptr_t dstStride, const int16_t* src, intptr_t srcStride); | |
201 | ||
202 | void x265_blockcopy_pp_32x8_avx(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb); | |
203 | void x265_blockcopy_pp_32x16_avx(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb); | |
204 | void x265_blockcopy_pp_32x24_avx(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb); | |
205 | void x265_blockcopy_pp_32x32_avx(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb); | |
206 | void x265_blockcopy_pp_32x48_avx(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb); | |
207 | void x265_blockcopy_pp_32x64_avx(pixel* a, intptr_t stridea, const pixel* b, intptr_t strideb); | |
208 | ||
209 | void x265_blockfill_s_16x16_avx2(int16_t* dst, intptr_t dstride, int16_t val); | |
210 | void x265_blockfill_s_32x32_avx2(int16_t* dst, intptr_t dstride, int16_t val); | |
72b9787e JB |
211 | |
212 | #undef BLOCKCOPY_COMMON | |
213 | #undef BLOCKCOPY_SS_PP | |
214 | #undef BLOCKCOPY_SP | |
215 | #undef BLOCKCOPY_PS | |
216 | #undef SETUP_BLOCKCOPY_PS | |
217 | #undef SETUP_BLOCKCOPY_SP | |
218 | #undef SETUP_BLOCKCOPY_SS_PP | |
219 | #undef SETUP_BLOCKCOPY_FUNC | |
220 | ||
221 | #endif // ifndef X265_I386_PIXEL_H |