Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | ;************************************************************************ |
2 | ;* VC3/DNxHD SIMD functions | |
3 | ;* Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com> | |
4 | ;* Copyright (c) 2014 Tiancheng "Timothy" Gu <timothygu99@gmail.com> | |
5 | ;* | |
6 | ;* This file is part of FFmpeg. | |
7 | ;* | |
8 | ;* FFmpeg is free software; you can redistribute it and/or | |
9 | ;* modify it under the terms of the GNU Lesser General Public | |
10 | ;* License as published by the Free Software Foundation; either | |
11 | ;* version 2.1 of the License, or (at your option) any later version. | |
12 | ;* | |
13 | ;* FFmpeg is distributed in the hope that it will be useful, | |
14 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | ;* Lesser General Public License for more details. | |
17 | ;* | |
18 | ;* You should have received a copy of the GNU Lesser General Public | |
19 | ;* License along with FFmpeg; if not, write to the Free Software | |
20 | ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | ;****************************************************************************** | |
22 | ||
23 | %include "libavutil/x86/x86util.asm" | |
24 | ||
25 | section .text | |
26 | ||
27 | ; void get_pixels_8x4_sym_sse2(int16_t *block, const uint8_t *pixels, | |
28 | ; ptrdiff_t line_size) | |
29 | INIT_XMM sse2 | |
30 | cglobal get_pixels_8x4_sym, 3,3,5, block, pixels, linesize | |
31 | pxor m4, m4 | |
32 | movq m0, [pixelsq] | |
33 | add pixelsq, linesizeq | |
34 | movq m1, [pixelsq] | |
35 | movq m2, [pixelsq+linesizeq] | |
36 | movq m3, [pixelsq+linesizeq*2] | |
37 | punpcklbw m0, m4 | |
38 | punpcklbw m1, m4 | |
39 | punpcklbw m2, m4 | |
40 | punpcklbw m3, m4 | |
41 | mova [blockq ], m0 | |
42 | mova [blockq+16 ], m1 | |
43 | mova [blockq+32 ], m2 | |
44 | mova [blockq+48 ], m3 | |
45 | mova [blockq+64 ], m3 | |
46 | mova [blockq+80 ], m2 | |
47 | mova [blockq+96 ], m1 | |
48 | mova [blockq+112], m0 | |
49 | RET |