Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2002 Brian Foley | |
3 | * Copyright (c) 2002 Dieter Shirley | |
4 | * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> | |
5 | * | |
6 | * This file is part of FFmpeg. | |
7 | * | |
8 | * FFmpeg is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * FFmpeg is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with FFmpeg; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | ||
23 | #include "config.h" | |
24 | #if HAVE_ALTIVEC_H | |
25 | #include <altivec.h> | |
26 | #endif | |
27 | ||
28 | #include "libavutil/attributes.h" | |
29 | #include "libavutil/cpu.h" | |
30 | #include "libavutil/ppc/cpu.h" | |
31 | #include "libavutil/ppc/types_altivec.h" | |
32 | #include "libavutil/ppc/util_altivec.h" | |
33 | #include "libavcodec/avcodec.h" | |
34 | #include "libavcodec/pixblockdsp.h" | |
35 | ||
36 | #if HAVE_ALTIVEC | |
37 | ||
38 | static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, | |
39 | ptrdiff_t line_size) | |
40 | { | |
41 | int i; | |
42 | vector unsigned char perm = vec_lvsl(0, pixels); | |
43 | const vector unsigned char zero = | |
44 | (const vector unsigned char) vec_splat_u8(0); | |
45 | ||
46 | for (i = 0; i < 8; i++) { | |
47 | /* Read potentially unaligned pixels. | |
48 | * We're reading 16 pixels, and actually only want 8, | |
49 | * but we simply ignore the extras. */ | |
50 | vector unsigned char pixl = vec_ld(0, pixels); | |
51 | vector unsigned char pixr = vec_ld(7, pixels); | |
52 | vector unsigned char bytes = vec_perm(pixl, pixr, perm); | |
53 | ||
54 | // Convert the bytes into shorts. | |
55 | vector signed short shorts = (vector signed short) vec_mergeh(zero, | |
56 | bytes); | |
57 | ||
58 | // Save the data to the block, we assume the block is 16-byte aligned. | |
59 | vec_st(shorts, i * 16, (vector signed short *) block); | |
60 | ||
61 | pixels += line_size; | |
62 | } | |
63 | } | |
64 | ||
65 | static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, | |
66 | const uint8_t *s2, int stride) | |
67 | { | |
68 | int i; | |
69 | vector unsigned char perm1 = vec_lvsl(0, s1); | |
70 | vector unsigned char perm2 = vec_lvsl(0, s2); | |
71 | const vector unsigned char zero = | |
72 | (const vector unsigned char) vec_splat_u8(0); | |
73 | vector signed short shorts1, shorts2; | |
74 | ||
75 | for (i = 0; i < 4; i++) { | |
76 | /* Read potentially unaligned pixels. | |
77 | * We're reading 16 pixels, and actually only want 8, | |
78 | * but we simply ignore the extras. */ | |
79 | vector unsigned char pixl = vec_ld(0, s1); | |
80 | vector unsigned char pixr = vec_ld(15, s1); | |
81 | vector unsigned char bytes = vec_perm(pixl, pixr, perm1); | |
82 | ||
83 | // Convert the bytes into shorts. | |
84 | shorts1 = (vector signed short) vec_mergeh(zero, bytes); | |
85 | ||
86 | // Do the same for the second block of pixels. | |
87 | pixl = vec_ld(0, s2); | |
88 | pixr = vec_ld(15, s2); | |
89 | bytes = vec_perm(pixl, pixr, perm2); | |
90 | ||
91 | // Convert the bytes into shorts. | |
92 | shorts2 = (vector signed short) vec_mergeh(zero, bytes); | |
93 | ||
94 | // Do the subtraction. | |
95 | shorts1 = vec_sub(shorts1, shorts2); | |
96 | ||
97 | // Save the data to the block, we assume the block is 16-byte aligned. | |
98 | vec_st(shorts1, 0, (vector signed short *) block); | |
99 | ||
100 | s1 += stride; | |
101 | s2 += stride; | |
102 | block += 8; | |
103 | ||
104 | /* The code below is a copy of the code above... | |
105 | * This is a manual unroll. */ | |
106 | ||
107 | /* Read potentially unaligned pixels. | |
108 | * We're reading 16 pixels, and actually only want 8, | |
109 | * but we simply ignore the extras. */ | |
110 | pixl = vec_ld(0, s1); | |
111 | pixr = vec_ld(15, s1); | |
112 | bytes = vec_perm(pixl, pixr, perm1); | |
113 | ||
114 | // Convert the bytes into shorts. | |
115 | shorts1 = (vector signed short) vec_mergeh(zero, bytes); | |
116 | ||
117 | // Do the same for the second block of pixels. | |
118 | pixl = vec_ld(0, s2); | |
119 | pixr = vec_ld(15, s2); | |
120 | bytes = vec_perm(pixl, pixr, perm2); | |
121 | ||
122 | // Convert the bytes into shorts. | |
123 | shorts2 = (vector signed short) vec_mergeh(zero, bytes); | |
124 | ||
125 | // Do the subtraction. | |
126 | shorts1 = vec_sub(shorts1, shorts2); | |
127 | ||
128 | // Save the data to the block, we assume the block is 16-byte aligned. | |
129 | vec_st(shorts1, 0, (vector signed short *) block); | |
130 | ||
131 | s1 += stride; | |
132 | s2 += stride; | |
133 | block += 8; | |
134 | } | |
135 | } | |
136 | ||
137 | #endif /* HAVE_ALTIVEC */ | |
138 | ||
139 | av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, | |
140 | AVCodecContext *avctx, | |
141 | unsigned high_bit_depth) | |
142 | { | |
143 | #if HAVE_ALTIVEC | |
144 | if (!PPC_ALTIVEC(av_get_cpu_flags())) | |
145 | return; | |
146 | ||
147 | c->diff_pixels = diff_pixels_altivec; | |
148 | ||
149 | if (!high_bit_depth) { | |
150 | c->get_pixels = get_pixels_altivec; | |
151 | } | |
152 | #endif /* HAVE_ALTIVEC */ | |
153 | } |