Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * This file is part of MPlayer. | |
3 | * | |
4 | * MPlayer is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * MPlayer is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License along | |
15 | * with MPlayer; if not, write to the Free Software Foundation, Inc., | |
16 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
17 | */ | |
18 | ||
19 | #include <stdio.h> | |
20 | #include <stdlib.h> | |
21 | #include <string.h> | |
22 | #include <inttypes.h> | |
23 | ||
24 | #include "config.h" | |
25 | #include "mp_msg.h" | |
26 | #include "cpudetect.h" | |
27 | ||
28 | #include "img_format.h" | |
29 | #include "mp_image.h" | |
30 | #include "vf.h" | |
31 | #include "libavutil/attributes.h" | |
32 | #include "libavutil/x86/asm.h" | |
33 | ||
34 | typedef void (pack_func_t)(unsigned char *dst, unsigned char *y, | |
35 | unsigned char *u, unsigned char *v, int w, int us, int vs); | |
36 | ||
37 | struct vf_priv_s { | |
38 | int mode; | |
39 | pack_func_t *pack[2]; | |
40 | }; | |
41 | ||
42 | static void pack_nn_C(unsigned char *dst, unsigned char *y, | |
43 | unsigned char *u, unsigned char *v, int w, | |
44 | int av_unused us, int av_unused vs) | |
45 | { | |
46 | int j; | |
47 | for (j = w/2; j; j--) { | |
48 | *dst++ = *y++; | |
49 | *dst++ = *u++; | |
50 | *dst++ = *y++; | |
51 | *dst++ = *v++; | |
52 | } | |
53 | } | |
54 | ||
55 | static void pack_li_0_C(unsigned char *dst, unsigned char *y, | |
56 | unsigned char *u, unsigned char *v, int w, int us, int vs) | |
57 | { | |
58 | int j; | |
59 | for (j = w/2; j; j--) { | |
60 | *dst++ = *y++; | |
61 | *dst++ = (u[us+us] + 7*u[0])>>3; | |
62 | *dst++ = *y++; | |
63 | *dst++ = (v[vs+vs] + 7*v[0])>>3; | |
64 | u++; v++; | |
65 | } | |
66 | } | |
67 | ||
68 | static void pack_li_1_C(unsigned char *dst, unsigned char *y, | |
69 | unsigned char *u, unsigned char *v, int w, int us, int vs) | |
70 | { | |
71 | int j; | |
72 | for (j = w/2; j; j--) { | |
73 | *dst++ = *y++; | |
74 | *dst++ = (3*u[us+us] + 5*u[0])>>3; | |
75 | *dst++ = *y++; | |
76 | *dst++ = (3*v[vs+vs] + 5*v[0])>>3; | |
77 | u++; v++; | |
78 | } | |
79 | } | |
80 | ||
81 | #if HAVE_MMX | |
82 | static void pack_nn_MMX(unsigned char *dst, unsigned char *y, | |
83 | unsigned char *u, unsigned char *v, int w, | |
84 | int av_unused us, int av_unused vs) | |
85 | { | |
86 | __asm__ volatile ("" | |
87 | ASMALIGN(4) | |
88 | "1: \n\t" | |
89 | "movq (%0), %%mm1 \n\t" | |
90 | "movq (%0), %%mm2 \n\t" | |
91 | "movq (%1), %%mm4 \n\t" | |
92 | "movq (%2), %%mm6 \n\t" | |
93 | "punpcklbw %%mm6, %%mm4 \n\t" | |
94 | "punpcklbw %%mm4, %%mm1 \n\t" | |
95 | "punpckhbw %%mm4, %%mm2 \n\t" | |
96 | ||
97 | "add $8, %0 \n\t" | |
98 | "add $4, %1 \n\t" | |
99 | "add $4, %2 \n\t" | |
100 | "movq %%mm1, (%3) \n\t" | |
101 | "movq %%mm2, 8(%3) \n\t" | |
102 | "add $16, %3 \n\t" | |
103 | "decl %4 \n\t" | |
104 | "jnz 1b \n\t" | |
105 | "emms \n\t" | |
106 | : | |
107 | : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8) | |
108 | : "memory" | |
109 | ); | |
110 | pack_nn_C(dst, y, u, v, (w&7), 0, 0); | |
111 | } | |
112 | ||
113 | #if HAVE_EBX_AVAILABLE | |
114 | static void pack_li_0_MMX(unsigned char *dst, unsigned char *y, | |
115 | unsigned char *u, unsigned char *v, int w, int us, int vs) | |
116 | { | |
117 | __asm__ volatile ("" | |
118 | "push %%"REG_BP" \n\t" | |
119 | #if ARCH_X86_64 | |
120 | "mov %6, %%"REG_BP" \n\t" | |
121 | #else | |
122 | "movl 4(%%"REG_d"), %%"REG_BP" \n\t" | |
123 | "movl (%%"REG_d"), %%"REG_d" \n\t" | |
124 | #endif | |
125 | "pxor %%mm0, %%mm0 \n\t" | |
126 | ||
127 | ASMALIGN(4) | |
128 | "2: \n\t" | |
129 | "movq (%%"REG_S"), %%mm1 \n\t" | |
130 | "movq (%%"REG_S"), %%mm2 \n\t" | |
131 | ||
132 | "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" | |
133 | "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" | |
134 | "punpcklbw %%mm0, %%mm4 \n\t" | |
135 | "punpcklbw %%mm0, %%mm6 \n\t" | |
136 | "movq (%%"REG_a"), %%mm3 \n\t" | |
137 | "movq (%%"REG_b"), %%mm5 \n\t" | |
138 | "punpcklbw %%mm0, %%mm3 \n\t" | |
139 | "punpcklbw %%mm0, %%mm5 \n\t" | |
140 | "paddw %%mm3, %%mm4 \n\t" | |
141 | "paddw %%mm5, %%mm6 \n\t" | |
142 | "paddw %%mm3, %%mm4 \n\t" | |
143 | "paddw %%mm5, %%mm6 \n\t" | |
144 | "paddw %%mm3, %%mm4 \n\t" | |
145 | "paddw %%mm5, %%mm6 \n\t" | |
146 | "paddw %%mm3, %%mm4 \n\t" | |
147 | "paddw %%mm5, %%mm6 \n\t" | |
148 | "paddw %%mm3, %%mm4 \n\t" | |
149 | "paddw %%mm5, %%mm6 \n\t" | |
150 | "paddw %%mm3, %%mm4 \n\t" | |
151 | "paddw %%mm5, %%mm6 \n\t" | |
152 | "paddw %%mm3, %%mm4 \n\t" | |
153 | "paddw %%mm5, %%mm6 \n\t" | |
154 | "psrlw $3, %%mm4 \n\t" | |
155 | "psrlw $3, %%mm6 \n\t" | |
156 | "packuswb %%mm4, %%mm4 \n\t" | |
157 | "packuswb %%mm6, %%mm6 \n\t" | |
158 | "punpcklbw %%mm6, %%mm4 \n\t" | |
159 | "punpcklbw %%mm4, %%mm1 \n\t" | |
160 | "punpckhbw %%mm4, %%mm2 \n\t" | |
161 | ||
162 | "movq %%mm1, (%%"REG_D") \n\t" | |
163 | "movq %%mm2, 8(%%"REG_D") \n\t" | |
164 | ||
165 | "movq 8(%%"REG_S"), %%mm1 \n\t" | |
166 | "movq 8(%%"REG_S"), %%mm2 \n\t" | |
167 | ||
168 | "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" | |
169 | "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" | |
170 | "punpckhbw %%mm0, %%mm4 \n\t" | |
171 | "punpckhbw %%mm0, %%mm6 \n\t" | |
172 | "movq (%%"REG_a"), %%mm3 \n\t" | |
173 | "movq (%%"REG_b"), %%mm5 \n\t" | |
174 | "punpckhbw %%mm0, %%mm3 \n\t" | |
175 | "punpckhbw %%mm0, %%mm5 \n\t" | |
176 | "paddw %%mm3, %%mm4 \n\t" | |
177 | "paddw %%mm5, %%mm6 \n\t" | |
178 | "paddw %%mm3, %%mm4 \n\t" | |
179 | "paddw %%mm5, %%mm6 \n\t" | |
180 | "paddw %%mm3, %%mm4 \n\t" | |
181 | "paddw %%mm5, %%mm6 \n\t" | |
182 | "paddw %%mm3, %%mm4 \n\t" | |
183 | "paddw %%mm5, %%mm6 \n\t" | |
184 | "paddw %%mm3, %%mm4 \n\t" | |
185 | "paddw %%mm5, %%mm6 \n\t" | |
186 | "paddw %%mm3, %%mm4 \n\t" | |
187 | "paddw %%mm5, %%mm6 \n\t" | |
188 | "paddw %%mm3, %%mm4 \n\t" | |
189 | "paddw %%mm5, %%mm6 \n\t" | |
190 | "psrlw $3, %%mm4 \n\t" | |
191 | "psrlw $3, %%mm6 \n\t" | |
192 | "packuswb %%mm4, %%mm4 \n\t" | |
193 | "packuswb %%mm6, %%mm6 \n\t" | |
194 | "punpcklbw %%mm6, %%mm4 \n\t" | |
195 | "punpcklbw %%mm4, %%mm1 \n\t" | |
196 | "punpckhbw %%mm4, %%mm2 \n\t" | |
197 | ||
198 | "add $16, %%"REG_S" \n\t" | |
199 | "add $8, %%"REG_a" \n\t" | |
200 | "add $8, %%"REG_b" \n\t" | |
201 | ||
202 | "movq %%mm1, 16(%%"REG_D") \n\t" | |
203 | "movq %%mm2, 24(%%"REG_D") \n\t" | |
204 | "add $32, %%"REG_D" \n\t" | |
205 | ||
206 | "decl %%ecx \n\t" | |
207 | "jnz 2b \n\t" | |
208 | "emms \n\t" | |
209 | "pop %%"REG_BP" \n\t" | |
210 | : | |
211 | : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16), | |
212 | #if ARCH_X86_64 | |
213 | "d" ((x86_reg)us), "r" ((x86_reg)vs) | |
214 | #else | |
215 | "d" (&us) | |
216 | #endif | |
217 | : "memory" | |
218 | ); | |
219 | pack_li_0_C(dst, y, u, v, (w&15), us, vs); | |
220 | } | |
221 | ||
222 | static void pack_li_1_MMX(unsigned char *dst, unsigned char *y, | |
223 | unsigned char *u, unsigned char *v, int w, int us, int vs) | |
224 | { | |
225 | __asm__ volatile ("" | |
226 | "push %%"REG_BP" \n\t" | |
227 | #if ARCH_X86_64 | |
228 | "mov %6, %%"REG_BP" \n\t" | |
229 | #else | |
230 | "movl 4(%%"REG_d"), %%"REG_BP" \n\t" | |
231 | "movl (%%"REG_d"), %%"REG_d" \n\t" | |
232 | #endif | |
233 | "pxor %%mm0, %%mm0 \n\t" | |
234 | ||
235 | ASMALIGN(4) | |
236 | "3: \n\t" | |
237 | "movq (%%"REG_S"), %%mm1 \n\t" | |
238 | "movq (%%"REG_S"), %%mm2 \n\t" | |
239 | ||
240 | "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" | |
241 | "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" | |
242 | "punpcklbw %%mm0, %%mm4 \n\t" | |
243 | "punpcklbw %%mm0, %%mm6 \n\t" | |
244 | "movq (%%"REG_a"), %%mm3 \n\t" | |
245 | "movq (%%"REG_b"), %%mm5 \n\t" | |
246 | "punpcklbw %%mm0, %%mm3 \n\t" | |
247 | "punpcklbw %%mm0, %%mm5 \n\t" | |
248 | "movq %%mm4, %%mm7 \n\t" | |
249 | "paddw %%mm4, %%mm4 \n\t" | |
250 | "paddw %%mm7, %%mm4 \n\t" | |
251 | "movq %%mm6, %%mm7 \n\t" | |
252 | "paddw %%mm6, %%mm6 \n\t" | |
253 | "paddw %%mm7, %%mm6 \n\t" | |
254 | "paddw %%mm3, %%mm4 \n\t" | |
255 | "paddw %%mm5, %%mm6 \n\t" | |
256 | "paddw %%mm3, %%mm4 \n\t" | |
257 | "paddw %%mm5, %%mm6 \n\t" | |
258 | "paddw %%mm3, %%mm4 \n\t" | |
259 | "paddw %%mm5, %%mm6 \n\t" | |
260 | "paddw %%mm3, %%mm4 \n\t" | |
261 | "paddw %%mm5, %%mm6 \n\t" | |
262 | "paddw %%mm3, %%mm4 \n\t" | |
263 | "paddw %%mm5, %%mm6 \n\t" | |
264 | "psrlw $3, %%mm4 \n\t" | |
265 | "psrlw $3, %%mm6 \n\t" | |
266 | "packuswb %%mm4, %%mm4 \n\t" | |
267 | "packuswb %%mm6, %%mm6 \n\t" | |
268 | "punpcklbw %%mm6, %%mm4 \n\t" | |
269 | "punpcklbw %%mm4, %%mm1 \n\t" | |
270 | "punpckhbw %%mm4, %%mm2 \n\t" | |
271 | ||
272 | "movq %%mm1, (%%"REG_D") \n\t" | |
273 | "movq %%mm2, 8(%%"REG_D") \n\t" | |
274 | ||
275 | "movq 8(%%"REG_S"), %%mm1 \n\t" | |
276 | "movq 8(%%"REG_S"), %%mm2 \n\t" | |
277 | ||
278 | "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" | |
279 | "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" | |
280 | "punpckhbw %%mm0, %%mm4 \n\t" | |
281 | "punpckhbw %%mm0, %%mm6 \n\t" | |
282 | "movq (%%"REG_a"), %%mm3 \n\t" | |
283 | "movq (%%"REG_b"), %%mm5 \n\t" | |
284 | "punpckhbw %%mm0, %%mm3 \n\t" | |
285 | "punpckhbw %%mm0, %%mm5 \n\t" | |
286 | "movq %%mm4, %%mm7 \n\t" | |
287 | "paddw %%mm4, %%mm4 \n\t" | |
288 | "paddw %%mm7, %%mm4 \n\t" | |
289 | "movq %%mm6, %%mm7 \n\t" | |
290 | "paddw %%mm6, %%mm6 \n\t" | |
291 | "paddw %%mm7, %%mm6 \n\t" | |
292 | "paddw %%mm3, %%mm4 \n\t" | |
293 | "paddw %%mm5, %%mm6 \n\t" | |
294 | "paddw %%mm3, %%mm4 \n\t" | |
295 | "paddw %%mm5, %%mm6 \n\t" | |
296 | "paddw %%mm3, %%mm4 \n\t" | |
297 | "paddw %%mm5, %%mm6 \n\t" | |
298 | "paddw %%mm3, %%mm4 \n\t" | |
299 | "paddw %%mm5, %%mm6 \n\t" | |
300 | "paddw %%mm3, %%mm4 \n\t" | |
301 | "paddw %%mm5, %%mm6 \n\t" | |
302 | "psrlw $3, %%mm4 \n\t" | |
303 | "psrlw $3, %%mm6 \n\t" | |
304 | "packuswb %%mm4, %%mm4 \n\t" | |
305 | "packuswb %%mm6, %%mm6 \n\t" | |
306 | "punpcklbw %%mm6, %%mm4 \n\t" | |
307 | "punpcklbw %%mm4, %%mm1 \n\t" | |
308 | "punpckhbw %%mm4, %%mm2 \n\t" | |
309 | ||
310 | "add $16, %%"REG_S" \n\t" | |
311 | "add $8, %%"REG_a" \n\t" | |
312 | "add $8, %%"REG_b" \n\t" | |
313 | ||
314 | "movq %%mm1, 16(%%"REG_D") \n\t" | |
315 | "movq %%mm2, 24(%%"REG_D") \n\t" | |
316 | "add $32, %%"REG_D" \n\t" | |
317 | ||
318 | "decl %%ecx \n\t" | |
319 | "jnz 3b \n\t" | |
320 | "emms \n\t" | |
321 | "pop %%"REG_BP" \n\t" | |
322 | : | |
323 | : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16), | |
324 | #if ARCH_X86_64 | |
325 | "d" ((x86_reg)us), "r" ((x86_reg)vs) | |
326 | #else | |
327 | "d" (&us) | |
328 | #endif | |
329 | : "memory" | |
330 | ); | |
331 | pack_li_1_C(dst, y, u, v, (w&15), us, vs); | |
332 | } | |
333 | #endif /* HAVE_EBX_AVAILABLE */ | |
334 | #endif | |
335 | ||
336 | static pack_func_t *pack_nn; | |
337 | static pack_func_t *pack_li_0; | |
338 | static pack_func_t *pack_li_1; | |
339 | ||
340 | static void ilpack(unsigned char *dst, unsigned char *src[3], | |
341 | int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2]) | |
342 | { | |
343 | int i; | |
344 | unsigned char *y, *u, *v; | |
345 | int ys = srcstride[0], us = srcstride[1], vs = srcstride[2]; | |
346 | int a, b; | |
347 | ||
348 | y = src[0]; | |
349 | u = src[1]; | |
350 | v = src[2]; | |
351 | ||
352 | pack_nn(dst, y, u, v, w, 0, 0); | |
353 | y += ys; dst += dststride; | |
354 | pack_nn(dst, y, u+us, v+vs, w, 0, 0); | |
355 | y += ys; dst += dststride; | |
356 | for (i=2; i<h-2; i++) { | |
357 | a = (i&2) ? 1 : -1; | |
358 | b = (i&1) ^ ((i&2)>>1); | |
359 | pack[b](dst, y, u, v, w, us*a, vs*a); | |
360 | y += ys; | |
361 | if ((i&3) == 1) { | |
362 | u -= us; | |
363 | v -= vs; | |
364 | } else { | |
365 | u += us; | |
366 | v += vs; | |
367 | } | |
368 | dst += dststride; | |
369 | } | |
370 | pack_nn(dst, y, u, v, w, 0, 0); | |
371 | y += ys; dst += dststride; u += us; v += vs; | |
372 | pack_nn(dst, y, u, v, w, 0, 0); | |
373 | } | |
374 | ||
375 | ||
376 | static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts) | |
377 | { | |
378 | mp_image_t *dmpi; | |
379 | ||
380 | // hope we'll get DR buffer: | |
381 | dmpi=ff_vf_get_image(vf->next, IMGFMT_YUY2, | |
382 | MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE, | |
383 | mpi->w, mpi->h); | |
384 | ||
385 | ilpack(dmpi->planes[0], mpi->planes, dmpi->stride[0], mpi->stride, mpi->w, mpi->h, vf->priv->pack); | |
386 | ||
387 | return ff_vf_next_put_image(vf,dmpi, pts); | |
388 | } | |
389 | ||
390 | static int config(struct vf_instance *vf, | |
391 | int width, int height, int d_width, int d_height, | |
392 | unsigned int flags, unsigned int outfmt) | |
393 | { | |
394 | /* FIXME - also support UYVY output? */ | |
395 | return ff_vf_next_config(vf, width, height, d_width, d_height, flags, IMGFMT_YUY2); | |
396 | } | |
397 | ||
398 | ||
399 | static int query_format(struct vf_instance *vf, unsigned int fmt) | |
400 | { | |
401 | /* FIXME - really any YUV 4:2:0 input format should work */ | |
402 | switch (fmt) { | |
403 | case IMGFMT_YV12: | |
404 | case IMGFMT_IYUV: | |
405 | case IMGFMT_I420: | |
406 | return ff_vf_next_query_format(vf,IMGFMT_YUY2); | |
407 | } | |
408 | return 0; | |
409 | } | |
410 | ||
411 | static int vf_open(vf_instance_t *vf, char *args) | |
412 | { | |
413 | vf->config=config; | |
414 | vf->query_format=query_format; | |
415 | vf->put_image=put_image; | |
416 | vf->priv = calloc(1, sizeof(struct vf_priv_s)); | |
417 | vf->priv->mode = 1; | |
418 | if (args) sscanf(args, "%d", &vf->priv->mode); | |
419 | ||
420 | pack_nn = pack_nn_C; | |
421 | pack_li_0 = pack_li_0_C; | |
422 | pack_li_1 = pack_li_1_C; | |
423 | #if HAVE_MMX | |
424 | if(ff_gCpuCaps.hasMMX) { | |
425 | pack_nn = pack_nn_MMX; | |
426 | #if HAVE_EBX_AVAILABLE | |
427 | pack_li_0 = pack_li_0_MMX; | |
428 | pack_li_1 = pack_li_1_MMX; | |
429 | #endif | |
430 | } | |
431 | #endif | |
432 | ||
433 | switch(vf->priv->mode) { | |
434 | case 0: | |
435 | vf->priv->pack[0] = vf->priv->pack[1] = pack_nn; | |
436 | break; | |
437 | default: | |
438 | ff_mp_msg(MSGT_VFILTER, MSGL_WARN, | |
439 | "ilpack: unknown mode %d (fallback to linear)\n", | |
440 | vf->priv->mode); | |
441 | /* Fallthrough */ | |
442 | case 1: | |
443 | vf->priv->pack[0] = pack_li_0; | |
444 | vf->priv->pack[1] = pack_li_1; | |
445 | break; | |
446 | } | |
447 | ||
448 | return 1; | |
449 | } | |
450 | ||
451 | const vf_info_t ff_vf_info_ilpack = { | |
452 | "4:2:0 planar -> 4:2:2 packed reinterlacer", | |
453 | "ilpack", | |
454 | "Richard Felker", | |
455 | "", | |
456 | vf_open, | |
457 | NULL | |
458 | }; |