Commit | Line | Data |
---|---|---|
f6fa7814 DM |
1 | /* |
2 | * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at> | |
3 | * Copyright (c) 2013 Paul B Mahol | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | #include "libavutil/attributes.h" | |
23 | #include "libavutil/x86/cpu.h" | |
24 | #include "libavutil/x86/asm.h" | |
25 | #include "libavfilter/vf_noise.h" | |
26 | ||
27 | #if HAVE_INLINE_ASM | |
28 | static void line_noise_mmx(uint8_t *dst, const uint8_t *src, | |
29 | const int8_t *noise, int len, int shift) | |
30 | { | |
31 | x86_reg mmx_len= len & (~7); | |
32 | noise += shift; | |
33 | ||
34 | __asm__ volatile( | |
35 | "mov %3, %%"REG_a" \n\t" | |
36 | "pcmpeqb %%mm7, %%mm7 \n\t" | |
37 | "psllw $15, %%mm7 \n\t" | |
38 | "packsswb %%mm7, %%mm7 \n\t" | |
39 | ".p2align 4 \n\t" | |
40 | "1: \n\t" | |
41 | "movq (%0, %%"REG_a"), %%mm0 \n\t" | |
42 | "movq (%1, %%"REG_a"), %%mm1 \n\t" | |
43 | "pxor %%mm7, %%mm0 \n\t" | |
44 | "paddsb %%mm1, %%mm0 \n\t" | |
45 | "pxor %%mm7, %%mm0 \n\t" | |
46 | "movq %%mm0, (%2, %%"REG_a") \n\t" | |
47 | "add $8, %%"REG_a" \n\t" | |
48 | " js 1b \n\t" | |
49 | :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) | |
50 | : "%"REG_a | |
51 | ); | |
52 | if (mmx_len != len) | |
53 | ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); | |
54 | } | |
55 | ||
56 | #if HAVE_6REGS | |
57 | static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src, | |
58 | int len, const int8_t * const *shift) | |
59 | { | |
60 | x86_reg mmx_len = len & (~7); | |
61 | ||
62 | __asm__ volatile( | |
63 | "mov %5, %%"REG_a" \n\t" | |
64 | ".p2align 4 \n\t" | |
65 | "1: \n\t" | |
66 | "movq (%1, %%"REG_a"), %%mm1 \n\t" | |
67 | "movq (%0, %%"REG_a"), %%mm0 \n\t" | |
68 | "paddb (%2, %%"REG_a"), %%mm1 \n\t" | |
69 | "paddb (%3, %%"REG_a"), %%mm1 \n\t" | |
70 | "movq %%mm0, %%mm2 \n\t" | |
71 | "movq %%mm1, %%mm3 \n\t" | |
72 | "punpcklbw %%mm0, %%mm0 \n\t" | |
73 | "punpckhbw %%mm2, %%mm2 \n\t" | |
74 | "punpcklbw %%mm1, %%mm1 \n\t" | |
75 | "punpckhbw %%mm3, %%mm3 \n\t" | |
76 | "pmulhw %%mm0, %%mm1 \n\t" | |
77 | "pmulhw %%mm2, %%mm3 \n\t" | |
78 | "paddw %%mm1, %%mm1 \n\t" | |
79 | "paddw %%mm3, %%mm3 \n\t" | |
80 | "paddw %%mm0, %%mm1 \n\t" | |
81 | "paddw %%mm2, %%mm3 \n\t" | |
82 | "psrlw $8, %%mm1 \n\t" | |
83 | "psrlw $8, %%mm3 \n\t" | |
84 | "packuswb %%mm3, %%mm1 \n\t" | |
85 | "movq %%mm1, (%4, %%"REG_a") \n\t" | |
86 | "add $8, %%"REG_a" \n\t" | |
87 | " js 1b \n\t" | |
88 | :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len), | |
89 | "r" (dst+mmx_len), "g" (-mmx_len) | |
90 | : "%"REG_a | |
91 | ); | |
92 | ||
93 | if (mmx_len != len){ | |
94 | const int8_t *shift2[3] = { shift[0]+mmx_len, shift[1]+mmx_len, shift[2]+mmx_len }; | |
95 | ff_line_noise_avg_c(dst+mmx_len, src+mmx_len, len-mmx_len, shift2); | |
96 | } | |
97 | } | |
98 | #endif /* HAVE_6REGS */ | |
99 | ||
100 | static void line_noise_mmxext(uint8_t *dst, const uint8_t *src, | |
101 | const int8_t *noise, int len, int shift) | |
102 | { | |
103 | x86_reg mmx_len = len & (~7); | |
104 | noise += shift; | |
105 | ||
106 | __asm__ volatile( | |
107 | "mov %3, %%"REG_a" \n\t" | |
108 | "pcmpeqb %%mm7, %%mm7 \n\t" | |
109 | "psllw $15, %%mm7 \n\t" | |
110 | "packsswb %%mm7, %%mm7 \n\t" | |
111 | ".p2align 4 \n\t" | |
112 | "1: \n\t" | |
113 | "movq (%0, %%"REG_a"), %%mm0 \n\t" | |
114 | "movq (%1, %%"REG_a"), %%mm1 \n\t" | |
115 | "pxor %%mm7, %%mm0 \n\t" | |
116 | "paddsb %%mm1, %%mm0 \n\t" | |
117 | "pxor %%mm7, %%mm0 \n\t" | |
118 | "movntq %%mm0, (%2, %%"REG_a") \n\t" | |
119 | "add $8, %%"REG_a" \n\t" | |
120 | " js 1b \n\t" | |
121 | :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) | |
122 | : "%"REG_a | |
123 | ); | |
124 | if (mmx_len != len) | |
125 | ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); | |
126 | } | |
127 | #endif /* HAVE_INLINE_ASM */ | |
128 | ||
129 | av_cold void ff_noise_init_x86(NoiseContext *n) | |
130 | { | |
131 | #if HAVE_INLINE_ASM | |
132 | int cpu_flags = av_get_cpu_flags(); | |
133 | ||
134 | if (INLINE_MMX(cpu_flags)) { | |
135 | n->line_noise = line_noise_mmx; | |
136 | #if HAVE_6REGS | |
137 | n->line_noise_avg = line_noise_avg_mmx; | |
138 | #endif | |
139 | } | |
140 | if (INLINE_MMXEXT(cpu_flags)) { | |
141 | n->line_noise = line_noise_mmxext; | |
142 | } | |
143 | #endif | |
144 | } |