Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (C) 2010 David Conrad | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "libavutil/x86/cpu.h" | |
22 | #include "diracdsp_mmx.h" | |
23 | #include "fpel.h" | |
24 | ||
25 | void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | |
26 | void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | |
27 | void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | |
28 | void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | |
29 | ||
30 | #define HPEL_FILTER(MMSIZE, EXT) \ | |
31 | void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \ | |
32 | void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \ | |
33 | \ | |
34 | static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \ | |
35 | const uint8_t *src, int stride, int width, int height) \ | |
36 | { \ | |
37 | while( height-- ) \ | |
38 | { \ | |
39 | ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \ | |
40 | ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \ | |
41 | ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \ | |
42 | \ | |
43 | dsth += stride; \ | |
44 | dstv += stride; \ | |
45 | dstc += stride; \ | |
46 | src += stride; \ | |
47 | } \ | |
48 | } | |
49 | ||
50 | #if !ARCH_X86_64 | |
51 | HPEL_FILTER(8, mmx) | |
52 | #endif | |
53 | HPEL_FILTER(16, sse2) | |
54 | ||
55 | #define PIXFUNC(PFX, IDX, EXT) \ | |
56 | /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = ff_ ## PFX ## _dirac_pixels8_ ## EXT;*/ \ | |
57 | c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \ | |
58 | c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT | |
59 | ||
60 | #define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\ | |
61 | void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ | |
62 | {\ | |
63 | if (h&3)\ | |
64 | ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\ | |
65 | else\ | |
66 | OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\ | |
67 | }\ | |
68 | void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ | |
69 | {\ | |
70 | if (h&3)\ | |
71 | ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\ | |
72 | else\ | |
73 | OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ | |
74 | }\ | |
75 | void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ | |
76 | {\ | |
77 | if (h&3) {\ | |
78 | ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\ | |
79 | } else {\ | |
80 | OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ | |
81 | OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ | |
82 | }\ | |
83 | } | |
84 | ||
85 | DIRAC_PIXOP(put, ff_put, mmx) | |
86 | DIRAC_PIXOP(avg, ff_avg, mmx) | |
87 | DIRAC_PIXOP(avg, ff_avg, mmxext) | |
88 | ||
89 | void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) | |
90 | { | |
91 | if (h&3) | |
92 | ff_put_dirac_pixels16_c(dst, src, stride, h); | |
93 | else | |
94 | ff_put_pixels16_sse2(dst, src[0], stride, h); | |
95 | } | |
96 | void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) | |
97 | { | |
98 | if (h&3) | |
99 | ff_avg_dirac_pixels16_c(dst, src, stride, h); | |
100 | else | |
101 | ff_avg_pixels16_sse2(dst, src[0], stride, h); | |
102 | } | |
103 | void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) | |
104 | { | |
105 | if (h&3) { | |
106 | ff_put_dirac_pixels32_c(dst, src, stride, h); | |
107 | } else { | |
108 | ff_put_pixels16_sse2(dst , src[0] , stride, h); | |
109 | ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h); | |
110 | } | |
111 | } | |
112 | void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) | |
113 | { | |
114 | if (h&3) { | |
115 | ff_avg_dirac_pixels32_c(dst, src, stride, h); | |
116 | } else { | |
117 | ff_avg_pixels16_sse2(dst , src[0] , stride, h); | |
118 | ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h); | |
119 | } | |
120 | } | |
121 | ||
122 | void ff_diracdsp_init_mmx(DiracDSPContext* c) | |
123 | { | |
124 | int mm_flags = av_get_cpu_flags(); | |
125 | ||
126 | if (EXTERNAL_MMX(mm_flags)) { | |
127 | c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx; | |
128 | #if !ARCH_X86_64 | |
129 | c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx; | |
130 | c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx; | |
131 | c->dirac_hpel_filter = dirac_hpel_filter_mmx; | |
132 | c->add_rect_clamped = ff_add_rect_clamped_mmx; | |
133 | c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx; | |
134 | #endif | |
135 | PIXFUNC(put, 0, mmx); | |
136 | PIXFUNC(avg, 0, mmx); | |
137 | } | |
138 | ||
139 | if (EXTERNAL_MMXEXT(mm_flags)) { | |
140 | PIXFUNC(avg, 0, mmxext); | |
141 | } | |
142 | ||
143 | if (EXTERNAL_SSE2(mm_flags)) { | |
144 | c->dirac_hpel_filter = dirac_hpel_filter_sse2; | |
145 | c->add_rect_clamped = ff_add_rect_clamped_sse2; | |
146 | c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2; | |
147 | ||
148 | c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; | |
149 | c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; | |
150 | ||
151 | c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2; | |
152 | c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2; | |
153 | c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2; | |
154 | c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2; | |
155 | } | |
156 | } |