Imported Debian version 2.5.0~trusty1.1
[deb_ffmpeg.git] / ffmpeg / libavcodec / x86 / pixblockdsp.asm
CommitLineData
2ba45a60
DM
1;*****************************************************************************
2;* SIMD-optimized pixel operations
3;*****************************************************************************
4;* Copyright (c) 2000, 2001 Fabrice Bellard
5;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6;*
7;* This file is part of FFmpeg.
8;*
9;* FFmpeg is free software; you can redistribute it and/or
10;* modify it under the terms of the GNU Lesser General Public
11;* License as published by the Free Software Foundation; either
12;* version 2.1 of the License, or (at your option) any later version.
13;*
14;* FFmpeg is distributed in the hope that it will be useful,
15;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17;* Lesser General Public License for more details.
18;*
19;* You should have received a copy of the GNU Lesser General Public
20;* License along with FFmpeg; if not, write to the Free Software
21;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22;*****************************************************************************
23
24%include "libavutil/x86/x86util.asm"
25
26SECTION .text
27
28INIT_MMX mmx
29; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size)
30cglobal get_pixels, 3,4
31 add r0, 128
32 mov r3, -128
33 pxor m7, m7
34.loop:
35 mova m0, [r1]
36 mova m2, [r1+r2]
37 mova m1, m0
38 mova m3, m2
39 punpcklbw m0, m7
40 punpckhbw m1, m7
41 punpcklbw m2, m7
42 punpckhbw m3, m7
43 mova [r0+r3+ 0], m0
44 mova [r0+r3+ 8], m1
45 mova [r0+r3+16], m2
46 mova [r0+r3+24], m3
47 lea r1, [r1+r2*2]
48 add r3, 32
49 js .loop
50 REP_RET
51
52INIT_XMM sse2
53cglobal get_pixels, 3, 4, 5
54 lea r3, [r2*3]
55 pxor m4, m4
56 movh m0, [r1]
57 movh m1, [r1+r2]
58 movh m2, [r1+r2*2]
59 movh m3, [r1+r3]
60 lea r1, [r1+r2*4]
61 punpcklbw m0, m4
62 punpcklbw m1, m4
63 punpcklbw m2, m4
64 punpcklbw m3, m4
65 mova [r0], m0
66 mova [r0+0x10], m1
67 mova [r0+0x20], m2
68 mova [r0+0x30], m3
69 movh m0, [r1]
70 movh m1, [r1+r2*1]
71 movh m2, [r1+r2*2]
72 movh m3, [r1+r3]
73 punpcklbw m0, m4
74 punpcklbw m1, m4
75 punpcklbw m2, m4
76 punpcklbw m3, m4
77 mova [r0+0x40], m0
78 mova [r0+0x50], m1
79 mova [r0+0x60], m2
80 mova [r0+0x70], m3
81 RET
82
83INIT_MMX mmx
84; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
85; int stride);
86cglobal diff_pixels, 4,5
87 movsxdifnidn r3, r3d
88 pxor m7, m7
89 add r0, 128
90 mov r4, -128
91.loop:
92 mova m0, [r1]
93 mova m2, [r2]
94 mova m1, m0
95 mova m3, m2
96 punpcklbw m0, m7
97 punpckhbw m1, m7
98 punpcklbw m2, m7
99 punpckhbw m3, m7
100 psubw m0, m2
101 psubw m1, m3
102 mova [r0+r4+0], m0
103 mova [r0+r4+8], m1
104 add r1, r3
105 add r2, r3
106 add r4, 16
107 jne .loop
108 REP_RET
109
110INIT_XMM sse2
111cglobal diff_pixels, 4, 5, 5
112 movsxdifnidn r3, r3d
113 pxor m4, m4
114 add r0, 128
115 mov r4, -128
116.loop:
117 movh m0, [r1]
118 movh m2, [r2]
119 movh m1, [r1+r3]
120 movh m3, [r2+r3]
121 punpcklbw m0, m4
122 punpcklbw m1, m4
123 punpcklbw m2, m4
124 punpcklbw m3, m4
125 psubw m0, m2
126 psubw m1, m3
127 mova [r0+r4+0 ], m0
128 mova [r0+r4+16], m1
129 lea r1, [r1+r3*2]
130 lea r2, [r2+r3*2]
131 add r4, 32
132 jne .loop
133 RET