Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * This file is part of FFmpeg. | |
3 | * | |
4 | * FFmpeg is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU Lesser General Public | |
6 | * License as published by the Free Software Foundation; either | |
7 | * version 2.1 of the License, or (at your option) any later version. | |
8 | * | |
9 | * FFmpeg is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * Lesser General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU Lesser General Public | |
15 | * License along with FFmpeg; if not, write to the Free Software | |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
17 | */ | |
18 | ||
19 | #include "config.h" | |
20 | #include <stdint.h> | |
21 | #if HAVE_ALTIVEC_H | |
22 | #include <altivec.h> | |
23 | #endif | |
24 | ||
25 | #include "libavutil/attributes.h" | |
26 | #include "libavutil/cpu.h" | |
27 | #include "libavutil/ppc/cpu.h" | |
28 | #include "libavutil/ppc/types_altivec.h" | |
29 | #include "libavutil/ppc/util_altivec.h" | |
30 | #include "libavcodec/mpegvideoencdsp.h" | |
31 | ||
32 | #if HAVE_ALTIVEC | |
33 | ||
34 | static int pix_norm1_altivec(uint8_t *pix, int line_size) | |
35 | { | |
36 | int i, s = 0; | |
37 | const vector unsigned int zero = | |
38 | (const vector unsigned int) vec_splat_u32(0); | |
39 | vector unsigned char perm = vec_lvsl(0, pix); | |
40 | vector unsigned int sv = (vector unsigned int) vec_splat_u32(0); | |
41 | vector signed int sum; | |
42 | ||
43 | for (i = 0; i < 16; i++) { | |
44 | /* Read the potentially unaligned pixels. */ | |
45 | vector unsigned char pixl = vec_ld(0, pix); | |
46 | vector unsigned char pixr = vec_ld(15, pix); | |
47 | vector unsigned char pixv = vec_perm(pixl, pixr, perm); | |
48 | ||
49 | /* Square the values, and add them to our sum. */ | |
50 | sv = vec_msum(pixv, pixv, sv); | |
51 | ||
52 | pix += line_size; | |
53 | } | |
54 | /* Sum up the four partial sums, and put the result into s. */ | |
55 | sum = vec_sums((vector signed int) sv, (vector signed int) zero); | |
56 | sum = vec_splat(sum, 3); | |
57 | vec_ste(sum, 0, &s); | |
58 | ||
59 | return s; | |
60 | } | |
61 | ||
62 | static int pix_sum_altivec(uint8_t *pix, int line_size) | |
63 | { | |
64 | int i, s; | |
65 | const vector unsigned int zero = | |
66 | (const vector unsigned int) vec_splat_u32(0); | |
67 | vector unsigned char perm = vec_lvsl(0, pix); | |
68 | vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); | |
69 | vector signed int sumdiffs; | |
70 | ||
71 | for (i = 0; i < 16; i++) { | |
72 | /* Read the potentially unaligned 16 pixels into t1. */ | |
73 | vector unsigned char pixl = vec_ld(0, pix); | |
74 | vector unsigned char pixr = vec_ld(15, pix); | |
75 | vector unsigned char t1 = vec_perm(pixl, pixr, perm); | |
76 | ||
77 | /* Add each 4 pixel group together and put 4 results into sad. */ | |
78 | sad = vec_sum4s(t1, sad); | |
79 | ||
80 | pix += line_size; | |
81 | } | |
82 | ||
83 | /* Sum up the four partial sums, and put the result into s. */ | |
84 | sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); | |
85 | sumdiffs = vec_splat(sumdiffs, 3); | |
86 | vec_ste(sumdiffs, 0, &s); | |
87 | ||
88 | return s; | |
89 | } | |
90 | ||
91 | #endif /* HAVE_ALTIVEC */ | |
92 | ||
93 | av_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c, | |
94 | AVCodecContext *avctx) | |
95 | { | |
96 | #if HAVE_ALTIVEC | |
97 | if (!PPC_ALTIVEC(av_get_cpu_flags())) | |
98 | return; | |
99 | ||
100 | c->pix_norm1 = pix_norm1_altivec; | |
101 | c->pix_sum = pix_sum_altivec; | |
102 | #endif /* HAVE_ALTIVEC */ | |
103 | } |