Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | ;****************************************************************************** |
2 | ;* SIMD-optimized clear block functions | |
3 | ;* Copyright (c) 2002 Michael Niedermayer | |
4 | ;* Copyright (c) 2008 Loren Merritt | |
5 | ;* Copyright (c) 2009 Fiona Glaser | |
6 | ;* | |
7 | ;* This file is part of FFmpeg. | |
8 | ;* | |
9 | ;* FFmpeg is free software; you can redistribute it and/or | |
10 | ;* modify it under the terms of the GNU Lesser General Public | |
11 | ;* License as published by the Free Software Foundation; either | |
12 | ;* version 2.1 of the License, or (at your option) any later version. | |
13 | ;* | |
14 | ;* FFmpeg is distributed in the hope that it will be useful, | |
15 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | ;* Lesser General Public License for more details. | |
18 | ;* | |
19 | ;* You should have received a copy of the GNU Lesser General Public | |
20 | ;* License along with FFmpeg; if not, write to the Free Software | |
21 | ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | ;****************************************************************************** | |
23 | ||
24 | %include "libavutil/x86/x86util.asm" | |
25 | ||
26 | SECTION_TEXT | |
27 | ||
28 | ;---------------------------------------- | |
29 | ; void ff_clear_block(int16_t *blocks); | |
30 | ;---------------------------------------- | |
31 | ; %1 = number of xmm registers used | |
32 | ; %2 = number of inline store loops | |
33 | %macro CLEAR_BLOCK 2 | |
34 | cglobal clear_block, 1, 1, %1, blocks | |
35 | ZERO m0, m0 | |
36 | %assign %%i 0 | |
37 | %rep %2 | |
38 | mova [blocksq+mmsize*(0+%%i)], m0 | |
39 | mova [blocksq+mmsize*(1+%%i)], m0 | |
40 | mova [blocksq+mmsize*(2+%%i)], m0 | |
41 | mova [blocksq+mmsize*(3+%%i)], m0 | |
42 | mova [blocksq+mmsize*(4+%%i)], m0 | |
43 | mova [blocksq+mmsize*(5+%%i)], m0 | |
44 | mova [blocksq+mmsize*(6+%%i)], m0 | |
45 | mova [blocksq+mmsize*(7+%%i)], m0 | |
46 | %assign %%i %%i+8 | |
47 | %endrep | |
48 | RET | |
49 | %endmacro | |
50 | ||
51 | INIT_MMX mmx | |
52 | %define ZERO pxor | |
53 | CLEAR_BLOCK 0, 2 | |
54 | INIT_XMM sse | |
55 | %define ZERO xorps | |
56 | CLEAR_BLOCK 1, 1 | |
57 | ||
58 | ;----------------------------------------- | |
59 | ; void ff_clear_blocks(int16_t *blocks); | |
60 | ;----------------------------------------- | |
61 | ; %1 = number of xmm registers used | |
62 | %macro CLEAR_BLOCKS 1 | |
63 | cglobal clear_blocks, 1, 2, %1, blocks, len | |
64 | add blocksq, 768 | |
65 | mov lenq, -768 | |
66 | ZERO m0, m0 | |
67 | .loop | |
68 | mova [blocksq+lenq+mmsize*0], m0 | |
69 | mova [blocksq+lenq+mmsize*1], m0 | |
70 | mova [blocksq+lenq+mmsize*2], m0 | |
71 | mova [blocksq+lenq+mmsize*3], m0 | |
72 | mova [blocksq+lenq+mmsize*4], m0 | |
73 | mova [blocksq+lenq+mmsize*5], m0 | |
74 | mova [blocksq+lenq+mmsize*6], m0 | |
75 | mova [blocksq+lenq+mmsize*7], m0 | |
76 | add lenq, mmsize*8 | |
77 | js .loop | |
78 | RET | |
79 | %endmacro | |
80 | ||
81 | INIT_MMX mmx | |
82 | %define ZERO pxor | |
83 | CLEAR_BLOCKS 0 | |
84 | INIT_XMM sse | |
85 | %define ZERO xorps | |
86 | CLEAR_BLOCKS 1 |