Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Format Conversion Utils | |
3 | * Copyright (c) 2000, 2001 Fabrice Bellard | |
4 | * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |
5 | * | |
6 | * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |
7 | * | |
8 | * This file is part of FFmpeg. | |
9 | * | |
10 | * FFmpeg is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License as published by the Free Software Foundation; either | |
13 | * version 2.1 of the License, or (at your option) any later version. | |
14 | * | |
15 | * FFmpeg is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | * Lesser General Public License for more details. | |
19 | * | |
20 | * You should have received a copy of the GNU Lesser General Public | |
21 | * License along with FFmpeg; if not, write to the Free Software | |
22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
23 | */ | |
24 | ||
25 | #include "libavutil/attributes.h" | |
26 | #include "libavutil/cpu.h" | |
27 | #include "libavutil/x86/asm.h" | |
28 | #include "libavutil/x86/cpu.h" | |
29 | #include "libavcodec/fmtconvert.h" | |
30 | ||
31 | #if HAVE_YASM | |
32 | ||
33 | void ff_int32_to_float_fmul_scalar_sse (float *dst, const int32_t *src, float mul, int len); | |
34 | void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int32_t *src, float mul, int len); | |
f6fa7814 DM |
35 | void ff_int32_to_float_fmul_array8_sse (FmtConvertContext *c, float *dst, const int32_t *src, |
36 | const float *mul, int len); | |
37 | void ff_int32_to_float_fmul_array8_sse2(FmtConvertContext *c, float *dst, const int32_t *src, | |
38 | const float *mul, int len); | |
2ba45a60 DM |
39 | |
40 | void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len); | |
41 | void ff_float_to_int16_sse (int16_t *dst, const float *src, long len); | |
42 | void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len); | |
43 | ||
44 | void ff_float_to_int16_step_3dnow(int16_t *dst, const float *src, long len, long step); | |
45 | void ff_float_to_int16_step_sse (int16_t *dst, const float *src, long len, long step); | |
46 | void ff_float_to_int16_step_sse2 (int16_t *dst, const float *src, long len, long step); | |
47 | ||
48 | void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len); | |
49 | void ff_float_to_int16_interleave2_sse (int16_t *dst, const float **src, long len); | |
50 | void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len); | |
51 | ||
52 | void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); | |
53 | void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); | |
54 | void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len); | |
55 | ||
56 | #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse | |
57 | ||
58 | #define FLOAT_TO_INT16_INTERLEAVE(cpu) \ | |
59 | /* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\ | |
60 | static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\ | |
61 | int c;\ | |
62 | for(c=0; c<channels; c++){\ | |
63 | ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\ | |
64 | }\ | |
65 | }\ | |
66 | \ | |
67 | static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\ | |
68 | if(channels==1)\ | |
69 | ff_float_to_int16_##cpu(dst, src[0], len);\ | |
70 | else if(channels==2){\ | |
71 | ff_float_to_int16_interleave2_##cpu(dst, src, len);\ | |
72 | }else if(channels==6){\ | |
73 | ff_float_to_int16_interleave6_##cpu(dst, src, len);\ | |
74 | }else\ | |
75 | float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\ | |
76 | } | |
77 | ||
78 | FLOAT_TO_INT16_INTERLEAVE(3dnow) | |
79 | FLOAT_TO_INT16_INTERLEAVE(sse) | |
80 | FLOAT_TO_INT16_INTERLEAVE(sse2) | |
81 | ||
82 | static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src, | |
83 | long len, int channels) | |
84 | { | |
85 | if(channels==6) | |
86 | ff_float_to_int16_interleave6_3dnowext(dst, src, len); | |
87 | else | |
88 | float_to_int16_interleave_3dnow(dst, src, len, channels); | |
89 | } | |
90 | ||
91 | void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len); | |
92 | void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len); | |
93 | ||
94 | void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len); | |
95 | void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len); | |
96 | ||
97 | static void float_interleave_mmx(float *dst, const float **src, | |
98 | unsigned int len, int channels) | |
99 | { | |
100 | if (channels == 2) { | |
101 | ff_float_interleave2_mmx(dst, src, len); | |
102 | } else if (channels == 6) | |
103 | ff_float_interleave6_mmx(dst, src, len); | |
104 | else | |
105 | ff_float_interleave_c(dst, src, len, channels); | |
106 | } | |
107 | ||
108 | static void float_interleave_sse(float *dst, const float **src, | |
109 | unsigned int len, int channels) | |
110 | { | |
111 | if (channels == 2) { | |
112 | ff_float_interleave2_sse(dst, src, len); | |
113 | } else if (channels == 6) | |
114 | ff_float_interleave6_sse(dst, src, len); | |
115 | else | |
116 | ff_float_interleave_c(dst, src, len, channels); | |
117 | } | |
118 | #endif /* HAVE_YASM */ | |
119 | ||
120 | av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) | |
121 | { | |
122 | #if HAVE_YASM | |
123 | int cpu_flags = av_get_cpu_flags(); | |
124 | ||
125 | if (EXTERNAL_MMX(cpu_flags)) { | |
126 | c->float_interleave = float_interleave_mmx; | |
127 | } | |
128 | if (EXTERNAL_AMD3DNOW(cpu_flags)) { | |
129 | if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |
130 | c->float_to_int16 = ff_float_to_int16_3dnow; | |
131 | c->float_to_int16_interleave = float_to_int16_interleave_3dnow; | |
132 | } | |
133 | } | |
134 | if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) { | |
135 | if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |
136 | c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; | |
137 | } | |
138 | } | |
139 | if (EXTERNAL_SSE(cpu_flags)) { | |
140 | c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse; | |
f6fa7814 | 141 | c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_sse; |
2ba45a60 DM |
142 | c->float_to_int16 = ff_float_to_int16_sse; |
143 | c->float_to_int16_interleave = float_to_int16_interleave_sse; | |
144 | c->float_interleave = float_interleave_sse; | |
145 | } | |
146 | if (EXTERNAL_SSE2(cpu_flags)) { | |
147 | c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2; | |
f6fa7814 | 148 | c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_sse2; |
2ba45a60 DM |
149 | c->float_to_int16 = ff_float_to_int16_sse2; |
150 | c->float_to_int16_interleave = float_to_int16_interleave_sse2; | |
151 | } | |
152 | #endif /* HAVE_YASM */ | |
153 | } |