Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Alpha optimized DSP utils | |
3 | * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | #ifndef AVCODEC_ALPHA_ASM_H | |
23 | #define AVCODEC_ALPHA_ASM_H | |
24 | ||
25 | #include <inttypes.h> | |
26 | ||
27 | #include "libavutil/common.h" | |
28 | ||
29 | #if AV_GCC_VERSION_AT_LEAST(2,96) | |
30 | # define likely(x) __builtin_expect((x) != 0, 1) | |
31 | # define unlikely(x) __builtin_expect((x) != 0, 0) | |
32 | #else | |
33 | # define likely(x) (x) | |
34 | # define unlikely(x) (x) | |
35 | #endif | |
36 | ||
37 | #define AMASK_BWX (1 << 0) | |
38 | #define AMASK_FIX (1 << 1) | |
39 | #define AMASK_CIX (1 << 2) | |
40 | #define AMASK_MVI (1 << 8) | |
41 | ||
42 | static inline uint64_t BYTE_VEC(uint64_t x) | |
43 | { | |
44 | x |= x << 8; | |
45 | x |= x << 16; | |
46 | x |= x << 32; | |
47 | return x; | |
48 | } | |
49 | static inline uint64_t WORD_VEC(uint64_t x) | |
50 | { | |
51 | x |= x << 16; | |
52 | x |= x << 32; | |
53 | return x; | |
54 | } | |
55 | ||
56 | #define sextw(x) ((int16_t) (x)) | |
57 | ||
58 | #ifdef __GNUC__ | |
59 | #define ldq(p) \ | |
60 | (((const union { \ | |
61 | uint64_t __l; \ | |
62 | __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \ | |
63 | } *) (p))->__l) | |
64 | #define ldl(p) \ | |
65 | (((const union { \ | |
66 | int32_t __l; \ | |
67 | __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \ | |
68 | } *) (p))->__l) | |
69 | #define stq(l, p) \ | |
70 | do { \ | |
71 | (((union { \ | |
72 | uint64_t __l; \ | |
73 | __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \ | |
74 | } *) (p))->__l) = l; \ | |
75 | } while (0) | |
76 | #define stl(l, p) \ | |
77 | do { \ | |
78 | (((union { \ | |
79 | int32_t __l; \ | |
80 | __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \ | |
81 | } *) (p))->__l) = l; \ | |
82 | } while (0) | |
83 | struct unaligned_long { uint64_t l; } __attribute__((packed)); | |
84 | #define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) | |
85 | #define uldq(a) (((const struct unaligned_long *) (a))->l) | |
86 | ||
87 | #if AV_GCC_VERSION_AT_LEAST(3,3) | |
88 | #define prefetch(p) __builtin_prefetch((p), 0, 1) | |
89 | #define prefetch_en(p) __builtin_prefetch((p), 0, 0) | |
90 | #define prefetch_m(p) __builtin_prefetch((p), 1, 1) | |
91 | #define prefetch_men(p) __builtin_prefetch((p), 1, 0) | |
92 | #define cmpbge __builtin_alpha_cmpbge | |
93 | /* Avoid warnings. */ | |
94 | #define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b)) | |
95 | #define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b)) | |
96 | #define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b)) | |
97 | #define zap __builtin_alpha_zap | |
98 | #define zapnot __builtin_alpha_zapnot | |
99 | #define amask __builtin_alpha_amask | |
100 | #define implver __builtin_alpha_implver | |
101 | #define rpcc __builtin_alpha_rpcc | |
102 | #else | |
103 | #define prefetch(p) __asm__ volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") | |
104 | #define prefetch_en(p) __asm__ volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") | |
105 | #define prefetch_m(p) __asm__ volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") | |
106 | #define prefetch_men(p) __asm__ volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") | |
107 | #define cmpbge(a, b) ({ uint64_t __r; __asm__ ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | |
108 | #define extql(a, b) ({ uint64_t __r; __asm__ ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | |
109 | #define extwl(a, b) ({ uint64_t __r; __asm__ ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | |
110 | #define extqh(a, b) ({ uint64_t __r; __asm__ ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | |
111 | #define zap(a, b) ({ uint64_t __r; __asm__ ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | |
112 | #define zapnot(a, b) ({ uint64_t __r; __asm__ ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) | |
113 | #define amask(a) ({ uint64_t __r; __asm__ ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) | |
114 | #define implver() ({ uint64_t __r; __asm__ ("implver %0" : "=r" (__r)); __r; }) | |
115 | #define rpcc() ({ uint64_t __r; __asm__ volatile ("rpcc %0" : "=r" (__r)); __r; }) | |
116 | #endif | |
117 | #define wh64(p) __asm__ volatile("wh64 (%0)" : : "r"(p) : "memory") | |
118 | ||
119 | #if AV_GCC_VERSION_AT_LEAST(3,3) && defined(__alpha_max__) | |
120 | #define minub8 __builtin_alpha_minub8 | |
121 | #define minsb8 __builtin_alpha_minsb8 | |
122 | #define minuw4 __builtin_alpha_minuw4 | |
123 | #define minsw4 __builtin_alpha_minsw4 | |
124 | #define maxub8 __builtin_alpha_maxub8 | |
125 | #define maxsb8 __builtin_alpha_maxsb8 | |
126 | #define maxuw4 __builtin_alpha_maxuw4 | |
127 | #define maxsw4 __builtin_alpha_maxsw4 | |
128 | #define perr __builtin_alpha_perr | |
129 | #define pklb __builtin_alpha_pklb | |
130 | #define pkwb __builtin_alpha_pkwb | |
131 | #define unpkbl __builtin_alpha_unpkbl | |
132 | #define unpkbw __builtin_alpha_unpkbw | |
133 | #else | |
134 | #define minub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | |
135 | #define minsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | |
136 | #define minuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | |
137 | #define minsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | |
138 | #define maxub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | |
139 | #define maxsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | |
140 | #define maxuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | |
141 | #define maxsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) | |
142 | #define perr(a, b) ({ uint64_t __r; __asm__ (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) | |
143 | #define pklb(a) ({ uint64_t __r; __asm__ (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |
144 | #define pkwb(a) ({ uint64_t __r; __asm__ (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |
145 | #define unpkbl(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |
146 | #define unpkbw(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) | |
147 | #endif | |
148 | ||
149 | #elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ | |
150 | ||
151 | #include <c_asm.h> | |
152 | #define ldq(p) (*(const uint64_t *) (p)) | |
153 | #define ldl(p) (*(const int32_t *) (p)) | |
154 | #define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) | |
155 | #define stl(l, p) do { *(int32_t *) (p) = (l); } while (0) | |
156 | #define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a) | |
157 | #define uldq(a) (*(const __unaligned uint64_t *) (a)) | |
158 | #define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b) | |
159 | #define extql(a, b) asm ("extql %a0,%a1,%v0", a, b) | |
160 | #define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b) | |
161 | #define extqh(a, b) asm ("extqh %a0,%a1,%v0", a, b) | |
162 | #define zap(a, b) asm ("zap %a0,%a1,%v0", a, b) | |
163 | #define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b) | |
164 | #define amask(a) asm ("amask %a0,%v0", a) | |
165 | #define implver() asm ("implver %v0") | |
166 | #define rpcc() asm ("rpcc %v0") | |
167 | #define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b) | |
168 | #define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b) | |
169 | #define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b) | |
170 | #define minsw4(a, b) asm ("minsw4 %a0,%a1,%v0", a, b) | |
171 | #define maxub8(a, b) asm ("maxub8 %a0,%a1,%v0", a, b) | |
172 | #define maxsb8(a, b) asm ("maxsb8 %a0,%a1,%v0", a, b) | |
173 | #define maxuw4(a, b) asm ("maxuw4 %a0,%a1,%v0", a, b) | |
174 | #define maxsw4(a, b) asm ("maxsw4 %a0,%a1,%v0", a, b) | |
175 | #define perr(a, b) asm ("perr %a0,%a1,%v0", a, b) | |
176 | #define pklb(a) asm ("pklb %a0,%v0", a) | |
177 | #define pkwb(a) asm ("pkwb %a0,%v0", a) | |
178 | #define unpkbl(a) asm ("unpkbl %a0,%v0", a) | |
179 | #define unpkbw(a) asm ("unpkbw %a0,%v0", a) | |
180 | #define wh64(a) asm ("wh64 %a0", a) | |
181 | ||
182 | #else | |
183 | #error "Unknown compiler!" | |
184 | #endif | |
185 | ||
186 | #endif /* AVCODEC_ALPHA_ASM_H */ |