Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * This file is part of FFmpeg. | |
3 | * | |
4 | * FFmpeg is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU Lesser General Public | |
6 | * License as published by the Free Software Foundation; either | |
7 | * version 2.1 of the License, or (at your option) any later version. | |
8 | * | |
9 | * FFmpeg is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * Lesser General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU Lesser General Public | |
15 | * License along with FFmpeg; if not, write to the Free Software | |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
17 | */ | |
18 | ||
19 | #include "config.h" | |
20 | #include <stdint.h> | |
21 | #if HAVE_ALTIVEC_H | |
22 | #include <altivec.h> | |
23 | #endif | |
24 | ||
25 | #include "libavutil/attributes.h" | |
26 | #include "libavutil/cpu.h" | |
27 | #include "libavutil/ppc/cpu.h" | |
28 | #include "libavutil/ppc/types_altivec.h" | |
29 | #include "libavutil/ppc/util_altivec.h" | |
30 | #include "libavcodec/mpegvideoencdsp.h" | |
31 | ||
32 | #if HAVE_ALTIVEC | |
33 | ||
f6fa7814 DM |
34 | #if HAVE_VSX |
35 | static int pix_norm1_altivec(uint8_t *pix, int line_size) | |
36 | { | |
37 | int i, s = 0; | |
38 | const vector unsigned int zero = | |
39 | (const vector unsigned int) vec_splat_u32(0); | |
40 | vector unsigned int sv = (vector unsigned int) vec_splat_u32(0); | |
41 | vector signed int sum; | |
42 | ||
43 | for (i = 0; i < 16; i++) { | |
44 | /* Read the potentially unaligned pixels. */ | |
45 | //vector unsigned char pixl = vec_ld(0, pix); | |
46 | //vector unsigned char pixr = vec_ld(15, pix); | |
47 | //vector unsigned char pixv = vec_perm(pixl, pixr, perm); | |
48 | vector unsigned char pixv = vec_vsx_ld(0, pix); | |
49 | ||
50 | /* Square the values, and add them to our sum. */ | |
51 | sv = vec_msum(pixv, pixv, sv); | |
52 | ||
53 | pix += line_size; | |
54 | } | |
55 | /* Sum up the four partial sums, and put the result into s. */ | |
56 | sum = vec_sums((vector signed int) sv, (vector signed int) zero); | |
57 | sum = vec_splat(sum, 3); | |
58 | vec_vsx_st(sum, 0, &s); | |
59 | return s; | |
60 | } | |
61 | #else | |
2ba45a60 DM |
62 | static int pix_norm1_altivec(uint8_t *pix, int line_size) |
63 | { | |
64 | int i, s = 0; | |
65 | const vector unsigned int zero = | |
66 | (const vector unsigned int) vec_splat_u32(0); | |
67 | vector unsigned char perm = vec_lvsl(0, pix); | |
68 | vector unsigned int sv = (vector unsigned int) vec_splat_u32(0); | |
69 | vector signed int sum; | |
70 | ||
71 | for (i = 0; i < 16; i++) { | |
72 | /* Read the potentially unaligned pixels. */ | |
73 | vector unsigned char pixl = vec_ld(0, pix); | |
74 | vector unsigned char pixr = vec_ld(15, pix); | |
75 | vector unsigned char pixv = vec_perm(pixl, pixr, perm); | |
76 | ||
77 | /* Square the values, and add them to our sum. */ | |
78 | sv = vec_msum(pixv, pixv, sv); | |
79 | ||
80 | pix += line_size; | |
81 | } | |
82 | /* Sum up the four partial sums, and put the result into s. */ | |
83 | sum = vec_sums((vector signed int) sv, (vector signed int) zero); | |
84 | sum = vec_splat(sum, 3); | |
85 | vec_ste(sum, 0, &s); | |
86 | ||
87 | return s; | |
88 | } | |
f6fa7814 DM |
89 | #endif /* HAVE_VSX */ |
90 | ||
91 | #if HAVE_VSX | |
92 | static int pix_sum_altivec(uint8_t *pix, int line_size) | |
93 | { | |
94 | int i, s; | |
95 | const vector unsigned int zero = | |
96 | (const vector unsigned int) vec_splat_u32(0); | |
97 | vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); | |
98 | vector signed int sumdiffs; | |
99 | ||
100 | for (i = 0; i < 16; i++) { | |
101 | /* Read the potentially unaligned 16 pixels into t1. */ | |
102 | //vector unsigned char pixl = vec_ld(0, pix); | |
103 | //vector unsigned char pixr = vec_ld(15, pix); | |
104 | //vector unsigned char t1 = vec_perm(pixl, pixr, perm); | |
105 | vector unsigned char t1 = vec_vsx_ld(0, pix); | |
106 | ||
107 | /* Add each 4 pixel group together and put 4 results into sad. */ | |
108 | sad = vec_sum4s(t1, sad); | |
109 | ||
110 | pix += line_size; | |
111 | } | |
2ba45a60 | 112 | |
f6fa7814 DM |
113 | /* Sum up the four partial sums, and put the result into s. */ |
114 | sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); | |
115 | sumdiffs = vec_splat(sumdiffs, 3); | |
116 | vec_vsx_st(sumdiffs, 0, &s); | |
117 | return s; | |
118 | } | |
119 | #else | |
2ba45a60 DM |
120 | static int pix_sum_altivec(uint8_t *pix, int line_size) |
121 | { | |
122 | int i, s; | |
123 | const vector unsigned int zero = | |
124 | (const vector unsigned int) vec_splat_u32(0); | |
125 | vector unsigned char perm = vec_lvsl(0, pix); | |
126 | vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); | |
127 | vector signed int sumdiffs; | |
128 | ||
129 | for (i = 0; i < 16; i++) { | |
130 | /* Read the potentially unaligned 16 pixels into t1. */ | |
131 | vector unsigned char pixl = vec_ld(0, pix); | |
132 | vector unsigned char pixr = vec_ld(15, pix); | |
133 | vector unsigned char t1 = vec_perm(pixl, pixr, perm); | |
134 | ||
135 | /* Add each 4 pixel group together and put 4 results into sad. */ | |
136 | sad = vec_sum4s(t1, sad); | |
137 | ||
138 | pix += line_size; | |
139 | } | |
140 | ||
141 | /* Sum up the four partial sums, and put the result into s. */ | |
142 | sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); | |
143 | sumdiffs = vec_splat(sumdiffs, 3); | |
144 | vec_ste(sumdiffs, 0, &s); | |
145 | ||
146 | return s; | |
147 | } | |
148 | ||
f6fa7814 DM |
149 | #endif /* HAVE_VSX */ |
150 | ||
2ba45a60 DM |
151 | #endif /* HAVE_ALTIVEC */ |
152 | ||
153 | av_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c, | |
154 | AVCodecContext *avctx) | |
155 | { | |
156 | #if HAVE_ALTIVEC | |
157 | if (!PPC_ALTIVEC(av_get_cpu_flags())) | |
158 | return; | |
159 | ||
160 | c->pix_norm1 = pix_norm1_altivec; | |
161 | c->pix_sum = pix_sum_altivec; | |
162 | #endif /* HAVE_ALTIVEC */ | |
163 | } |