Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2002 Dieter Shirley | |
3 | * | |
4 | * dct_unquantize_h263_altivec: | |
5 | * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> | |
6 | * | |
7 | * This file is part of FFmpeg. | |
8 | * | |
9 | * FFmpeg is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2.1 of the License, or (at your option) any later version. | |
13 | * | |
14 | * FFmpeg is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with FFmpeg; if not, write to the Free Software | |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | */ | |
23 | ||
24 | #include <stdlib.h> | |
25 | #include <stdio.h> | |
26 | ||
27 | #include "config.h" | |
28 | #include "libavutil/attributes.h" | |
29 | #include "libavutil/cpu.h" | |
30 | #include "libavutil/ppc/cpu.h" | |
31 | #include "libavutil/ppc/types_altivec.h" | |
32 | #include "libavutil/ppc/util_altivec.h" | |
33 | #include "libavcodec/mpegvideo.h" | |
34 | ||
35 | #if HAVE_ALTIVEC | |
36 | ||
37 | /* AltiVec version of dct_unquantize_h263 | |
38 | this code assumes `block' is 16 bytes-aligned */ | |
39 | static void dct_unquantize_h263_altivec(MpegEncContext *s, | |
40 | int16_t *block, int n, int qscale) | |
41 | { | |
42 | int i, level, qmul, qadd; | |
43 | int nCoeffs; | |
44 | ||
45 | assert(s->block_last_index[n]>=0); | |
46 | ||
47 | qadd = (qscale - 1) | 1; | |
48 | qmul = qscale << 1; | |
49 | ||
50 | if (s->mb_intra) { | |
51 | if (!s->h263_aic) { | |
52 | if (n < 4) | |
53 | block[0] = block[0] * s->y_dc_scale; | |
54 | else | |
55 | block[0] = block[0] * s->c_dc_scale; | |
56 | }else | |
57 | qadd = 0; | |
58 | i = 1; | |
59 | nCoeffs= 63; //does not always use zigzag table | |
60 | } else { | |
61 | i = 0; | |
62 | nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; | |
63 | } | |
64 | ||
65 | { | |
66 | register const vector signed short vczero = (const vector signed short)vec_splat_s16(0); | |
67 | DECLARE_ALIGNED(16, short, qmul8) = qmul; | |
68 | DECLARE_ALIGNED(16, short, qadd8) = qadd; | |
69 | register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; | |
70 | register vector bool short blockv_null, blockv_neg; | |
71 | register short backup_0 = block[0]; | |
72 | register int j = 0; | |
73 | ||
74 | qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0); | |
75 | qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0); | |
76 | nqaddv = vec_sub(vczero, qaddv); | |
77 | ||
78 | // vectorize all the 16 bytes-aligned blocks | |
79 | // of 8 elements | |
80 | for(; (j + 7) <= nCoeffs ; j+=8) { | |
81 | blockv = vec_ld(j << 1, block); | |
82 | blockv_neg = vec_cmplt(blockv, vczero); | |
83 | blockv_null = vec_cmpeq(blockv, vczero); | |
84 | // choose between +qadd or -qadd as the third operand | |
85 | temp1 = vec_sel(qaddv, nqaddv, blockv_neg); | |
86 | // multiply & add (block{i,i+7} * qmul [+-] qadd) | |
87 | temp1 = vec_mladd(blockv, qmulv, temp1); | |
88 | // put 0 where block[{i,i+7} used to have 0 | |
89 | blockv = vec_sel(temp1, blockv, blockv_null); | |
90 | vec_st(blockv, j << 1, block); | |
91 | } | |
92 | ||
93 | // if nCoeffs isn't a multiple of 8, finish the job | |
94 | // using good old scalar units. | |
95 | // (we could do it using a truncated vector, | |
96 | // but I'm not sure it's worth the hassle) | |
97 | for(; j <= nCoeffs ; j++) { | |
98 | level = block[j]; | |
99 | if (level) { | |
100 | if (level < 0) { | |
101 | level = level * qmul - qadd; | |
102 | } else { | |
103 | level = level * qmul + qadd; | |
104 | } | |
105 | block[j] = level; | |
106 | } | |
107 | } | |
108 | ||
109 | if (i == 1) { | |
110 | // cheat. this avoid special-casing the first iteration | |
111 | block[0] = backup_0; | |
112 | } | |
113 | } | |
114 | } | |
115 | ||
116 | #endif /* HAVE_ALTIVEC */ | |
117 | ||
118 | av_cold void ff_mpv_common_init_ppc(MpegEncContext *s) | |
119 | { | |
120 | #if HAVE_ALTIVEC | |
121 | if (!PPC_ALTIVEC(av_get_cpu_flags())) | |
122 | return; | |
123 | ||
124 | if ((s->avctx->dct_algo == FF_DCT_AUTO) || | |
125 | (s->avctx->dct_algo == FF_DCT_ALTIVEC)) { | |
126 | s->dct_unquantize_h263_intra = dct_unquantize_h263_altivec; | |
127 | s->dct_unquantize_h263_inter = dct_unquantize_h263_altivec; | |
128 | } | |
129 | #endif /* HAVE_ALTIVEC */ | |
130 | } |