Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Alpha optimized DSP utils | |
3 | * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | |
4 | * | |
5 | * This file is part of FFmpeg. | |
6 | * | |
7 | * FFmpeg is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * FFmpeg is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with FFmpeg; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
21 | ||
22 | #include "libavutil/attributes.h" | |
23 | #include "libavcodec/mpegvideo.h" | |
24 | #include "asm.h" | |
25 | ||
26 | static void dct_unquantize_h263_axp(int16_t *block, int n_coeffs, | |
27 | uint64_t qscale, uint64_t qadd) | |
28 | { | |
29 | uint64_t qmul = qscale << 1; | |
30 | uint64_t correction = WORD_VEC(qmul * 255 >> 8); | |
31 | int i; | |
32 | ||
33 | qadd = WORD_VEC(qadd); | |
34 | ||
35 | for(i = 0; i <= n_coeffs; block += 4, i += 4) { | |
36 | uint64_t levels, negmask, zeros, add, sub; | |
37 | ||
38 | levels = ldq(block); | |
39 | if (levels == 0) | |
40 | continue; | |
41 | ||
42 | #ifdef __alpha_max__ | |
43 | /* I don't think the speed difference justifies runtime | |
44 | detection. */ | |
45 | negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */ | |
46 | negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ | |
47 | #else | |
48 | negmask = cmpbge(WORD_VEC(0x7fff), levels); | |
49 | negmask &= (negmask >> 1) | (1 << 7); | |
50 | negmask = zap(-1, negmask); | |
51 | #endif | |
52 | ||
53 | zeros = cmpbge(0, levels); | |
54 | zeros &= zeros >> 1; | |
55 | /* zeros |= zeros << 1 is not needed since qadd <= 255, so | |
56 | zapping the lower byte suffices. */ | |
57 | ||
58 | levels *= qmul; | |
59 | levels -= correction & (negmask << 16); | |
60 | ||
61 | add = qadd & ~negmask; | |
62 | sub = qadd & negmask; | |
63 | /* Set qadd to 0 for levels == 0. */ | |
64 | add = zap(add, zeros); | |
65 | levels += add; | |
66 | levels -= sub; | |
67 | ||
68 | stq(levels, block); | |
69 | } | |
70 | } | |
71 | ||
72 | static void dct_unquantize_h263_intra_axp(MpegEncContext *s, int16_t *block, | |
73 | int n, int qscale) | |
74 | { | |
75 | int n_coeffs; | |
76 | uint64_t qadd; | |
77 | int16_t block0 = block[0]; | |
78 | ||
79 | if (!s->h263_aic) { | |
80 | if (n < 4) | |
81 | block0 *= s->y_dc_scale; | |
82 | else | |
83 | block0 *= s->c_dc_scale; | |
84 | qadd = (qscale - 1) | 1; | |
85 | } else { | |
86 | qadd = 0; | |
87 | } | |
88 | ||
89 | if(s->ac_pred) | |
90 | n_coeffs = 63; | |
91 | else | |
92 | n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; | |
93 | ||
94 | dct_unquantize_h263_axp(block, n_coeffs, qscale, qadd); | |
95 | ||
96 | block[0] = block0; | |
97 | } | |
98 | ||
99 | static void dct_unquantize_h263_inter_axp(MpegEncContext *s, int16_t *block, | |
100 | int n, int qscale) | |
101 | { | |
102 | int n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; | |
103 | dct_unquantize_h263_axp(block, n_coeffs, qscale, (qscale - 1) | 1); | |
104 | } | |
105 | ||
106 | av_cold void ff_mpv_common_init_axp(MpegEncContext *s) | |
107 | { | |
108 | s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp; | |
109 | s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_axp; | |
110 | } |