Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Format Conversion Utils | |
3 | * Copyright (c) 2000, 2001 Fabrice Bellard | |
4 | * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |
5 | * | |
6 | * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |
7 | * | |
8 | * This file is part of FFmpeg. | |
9 | * | |
10 | * FFmpeg is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License as published by the Free Software Foundation; either | |
13 | * version 2.1 of the License, or (at your option) any later version. | |
14 | * | |
15 | * FFmpeg is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | * Lesser General Public License for more details. | |
19 | * | |
20 | * You should have received a copy of the GNU Lesser General Public | |
21 | * License along with FFmpeg; if not, write to the Free Software | |
22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
23 | */ | |
24 | ||
25 | #include "libavutil/attributes.h" | |
26 | #include "libavutil/cpu.h" | |
27 | #include "libavutil/x86/asm.h" | |
28 | #include "libavutil/x86/cpu.h" | |
29 | #include "libavcodec/fmtconvert.h" | |
30 | ||
31 | #if HAVE_YASM | |
32 | ||
33 | void ff_int32_to_float_fmul_scalar_sse (float *dst, const int32_t *src, float mul, int len); | |
34 | void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int32_t *src, float mul, int len); | |
35 | ||
36 | void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len); | |
37 | void ff_float_to_int16_sse (int16_t *dst, const float *src, long len); | |
38 | void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len); | |
39 | ||
40 | void ff_float_to_int16_step_3dnow(int16_t *dst, const float *src, long len, long step); | |
41 | void ff_float_to_int16_step_sse (int16_t *dst, const float *src, long len, long step); | |
42 | void ff_float_to_int16_step_sse2 (int16_t *dst, const float *src, long len, long step); | |
43 | ||
44 | void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len); | |
45 | void ff_float_to_int16_interleave2_sse (int16_t *dst, const float **src, long len); | |
46 | void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len); | |
47 | ||
48 | void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); | |
49 | void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); | |
50 | void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len); | |
51 | ||
52 | #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse | |
53 | ||
54 | #define FLOAT_TO_INT16_INTERLEAVE(cpu) \ | |
55 | /* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\ | |
56 | static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\ | |
57 | int c;\ | |
58 | for(c=0; c<channels; c++){\ | |
59 | ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\ | |
60 | }\ | |
61 | }\ | |
62 | \ | |
63 | static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\ | |
64 | if(channels==1)\ | |
65 | ff_float_to_int16_##cpu(dst, src[0], len);\ | |
66 | else if(channels==2){\ | |
67 | ff_float_to_int16_interleave2_##cpu(dst, src, len);\ | |
68 | }else if(channels==6){\ | |
69 | ff_float_to_int16_interleave6_##cpu(dst, src, len);\ | |
70 | }else\ | |
71 | float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\ | |
72 | } | |
73 | ||
74 | FLOAT_TO_INT16_INTERLEAVE(3dnow) | |
75 | FLOAT_TO_INT16_INTERLEAVE(sse) | |
76 | FLOAT_TO_INT16_INTERLEAVE(sse2) | |
77 | ||
78 | static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src, | |
79 | long len, int channels) | |
80 | { | |
81 | if(channels==6) | |
82 | ff_float_to_int16_interleave6_3dnowext(dst, src, len); | |
83 | else | |
84 | float_to_int16_interleave_3dnow(dst, src, len, channels); | |
85 | } | |
86 | ||
87 | void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len); | |
88 | void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len); | |
89 | ||
90 | void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len); | |
91 | void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len); | |
92 | ||
93 | static void float_interleave_mmx(float *dst, const float **src, | |
94 | unsigned int len, int channels) | |
95 | { | |
96 | if (channels == 2) { | |
97 | ff_float_interleave2_mmx(dst, src, len); | |
98 | } else if (channels == 6) | |
99 | ff_float_interleave6_mmx(dst, src, len); | |
100 | else | |
101 | ff_float_interleave_c(dst, src, len, channels); | |
102 | } | |
103 | ||
104 | static void float_interleave_sse(float *dst, const float **src, | |
105 | unsigned int len, int channels) | |
106 | { | |
107 | if (channels == 2) { | |
108 | ff_float_interleave2_sse(dst, src, len); | |
109 | } else if (channels == 6) | |
110 | ff_float_interleave6_sse(dst, src, len); | |
111 | else | |
112 | ff_float_interleave_c(dst, src, len, channels); | |
113 | } | |
114 | #endif /* HAVE_YASM */ | |
115 | ||
116 | av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) | |
117 | { | |
118 | #if HAVE_YASM | |
119 | int cpu_flags = av_get_cpu_flags(); | |
120 | ||
121 | if (EXTERNAL_MMX(cpu_flags)) { | |
122 | c->float_interleave = float_interleave_mmx; | |
123 | } | |
124 | if (EXTERNAL_AMD3DNOW(cpu_flags)) { | |
125 | if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |
126 | c->float_to_int16 = ff_float_to_int16_3dnow; | |
127 | c->float_to_int16_interleave = float_to_int16_interleave_3dnow; | |
128 | } | |
129 | } | |
130 | if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) { | |
131 | if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |
132 | c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; | |
133 | } | |
134 | } | |
135 | if (EXTERNAL_SSE(cpu_flags)) { | |
136 | c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse; | |
137 | c->float_to_int16 = ff_float_to_int16_sse; | |
138 | c->float_to_int16_interleave = float_to_int16_interleave_sse; | |
139 | c->float_interleave = float_interleave_sse; | |
140 | } | |
141 | if (EXTERNAL_SSE2(cpu_flags)) { | |
142 | c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2; | |
143 | c->float_to_int16 = ff_float_to_int16_sse2; | |
144 | c->float_to_int16_interleave = float_to_int16_interleave_sse2; | |
145 | } | |
146 | #endif /* HAVE_YASM */ | |
147 | } |