Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | ;****************************************************************************** |
2 | ;* Copyright (c) 2012 Loren Merritt | |
3 | ;* | |
4 | ;* This file is part of FFmpeg. | |
5 | ;* | |
6 | ;* FFmpeg is free software; you can redistribute it and/or | |
7 | ;* modify it under the terms of the GNU Lesser General Public | |
8 | ;* License as published by the Free Software Foundation; either | |
9 | ;* version 2.1 of the License, or (at your option) any later version. | |
10 | ;* | |
11 | ;* FFmpeg is distributed in the hope that it will be useful, | |
12 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | ;* Lesser General Public License for more details. | |
15 | ;* | |
16 | ;* You should have received a copy of the GNU Lesser General Public | |
17 | ;* License along with FFmpeg; if not, write to the Free Software | |
18 | ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | ;****************************************************************************** | |
20 | ||
21 | %include "libavutil/x86/x86util.asm" | |
22 | ||
23 | SECTION .text | |
24 | ||
25 | %macro LOWPASS 3 ; prevsample, cursample, lut | |
26 | sub %1q, %2q | |
27 | %if lut_bits != 8 | |
28 | sar %1q, 8-lut_bits | |
29 | %endif | |
30 | movsx %1d, word [%3q+%1q*2] | |
31 | add %1d, %2d | |
32 | %endmacro | |
33 | ||
34 | %macro LOAD 3 ; dstreg, x, bitdepth | |
35 | %if %3 == 8 | |
36 | movzx %1, byte [srcq+%2] | |
37 | %else | |
38 | movzx %1, word [srcq+(%2)*2] | |
39 | %endif | |
40 | %if %3 != 16 | |
41 | shl %1, 16-%3 | |
42 | add %1, (1<<(15-%3))-1 | |
43 | %endif | |
44 | %endmacro | |
45 | ||
46 | %macro HQDN3D_ROW 1 ; bitdepth | |
47 | %if ARCH_X86_64 | |
48 | cglobal hqdn3d_row_%1_x86, 7,10,0, src, dst, lineant, frameant, width, spatial, temporal, pixelant, t0, t1 | |
49 | %else | |
50 | cglobal hqdn3d_row_%1_x86, 7,7,0, src, dst, lineant, frameant, width, spatial, temporal | |
51 | %endif | |
52 | %assign bytedepth (%1+7)>>3 | |
53 | %assign lut_bits 4+4*(%1/16) | |
54 | dec widthq | |
55 | lea srcq, [srcq+widthq*bytedepth] | |
56 | lea dstq, [dstq+widthq*bytedepth] | |
57 | lea frameantq, [frameantq+widthq*2] | |
58 | lea lineantq, [lineantq+widthq*2] | |
59 | neg widthq | |
60 | %define xq widthq | |
61 | %if ARCH_X86_32 | |
62 | mov dstmp, dstq | |
63 | mov srcmp, srcq | |
64 | mov frameantmp, frameantq | |
65 | mov lineantmp, lineantq | |
66 | %define dstq r0 | |
67 | %define frameantq r0 | |
68 | %define lineantq r0 | |
69 | %define pixelantq r1 | |
70 | %define pixelantd r1d | |
71 | DECLARE_REG_TMP 2,3 | |
72 | %endif | |
73 | LOAD pixelantd, xq, %1 | |
74 | ALIGN 16 | |
75 | .loop: | |
76 | movifnidn srcq, srcmp | |
77 | LOAD t0d, xq+1, %1 ; skip on the last iteration to avoid overread | |
78 | .loop2: | |
79 | movifnidn lineantq, lineantmp | |
80 | movzx t1d, word [lineantq+xq*2] | |
81 | LOWPASS t1, pixelant, spatial | |
82 | mov [lineantq+xq*2], t1w | |
83 | LOWPASS pixelant, t0, spatial | |
84 | movifnidn frameantq, frameantmp | |
85 | movzx t0d, word [frameantq+xq*2] | |
86 | LOWPASS t0, t1, temporal | |
87 | mov [frameantq+xq*2], t0w | |
88 | movifnidn dstq, dstmp | |
89 | %if %1 != 16 | |
90 | shr t0d, 16-%1 ; could eliminate this by storing from t0h, but only with some contraints on register allocation | |
91 | %endif | |
92 | %if %1 == 8 | |
93 | mov [dstq+xq], t0b | |
94 | %else | |
95 | mov [dstq+xq*2], t0w | |
96 | %endif | |
97 | inc xq | |
98 | jl .loop | |
99 | je .loop2 | |
100 | REP_RET | |
101 | %endmacro ; HQDN3D_ROW | |
102 | ||
103 | HQDN3D_ROW 8 | |
104 | HQDN3D_ROW 9 | |
105 | HQDN3D_ROW 10 | |
106 | HQDN3D_ROW 16 |