Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | ;****************************************************************************** |
2 | ;* Pixel utilities SIMD | |
3 | ;* | |
4 | ;* Copyright (C) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |
5 | ;* Copyright (C) 2014 Clément Bœsch <u pkh me> | |
6 | ;* | |
7 | ;* This file is part of FFmpeg. | |
8 | ;* | |
9 | ;* FFmpeg is free software; you can redistribute it and/or | |
10 | ;* modify it under the terms of the GNU Lesser General Public | |
11 | ;* License as published by the Free Software Foundation; either | |
12 | ;* version 2.1 of the License, or (at your option) any later version. | |
13 | ;* | |
14 | ;* FFmpeg is distributed in the hope that it will be useful, | |
15 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | ;* Lesser General Public License for more details. | |
18 | ;* | |
19 | ;* You should have received a copy of the GNU Lesser General Public | |
20 | ;* License along with FFmpeg; if not, write to the Free Software | |
21 | ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | ;****************************************************************************** | |
23 | ||
24 | %include "x86util.asm" | |
25 | ||
26 | SECTION_TEXT | |
27 | ||
28 | ;------------------------------------------------------------------------------- | |
29 | ; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, | |
30 | ; const uint8_t *src2, ptrdiff_t stride2); | |
31 | ;------------------------------------------------------------------------------- | |
32 | INIT_MMX mmx | |
33 | cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 | |
34 | pxor m7, m7 | |
35 | pxor m6, m6 | |
36 | %rep 4 | |
37 | mova m0, [src1q] | |
38 | mova m2, [src1q + stride1q] | |
39 | mova m1, [src2q] | |
40 | mova m3, [src2q + stride2q] | |
41 | psubusb m4, m0, m1 | |
42 | psubusb m5, m2, m3 | |
43 | psubusb m1, m0 | |
44 | psubusb m3, m2 | |
45 | por m1, m4 | |
46 | por m3, m5 | |
47 | punpcklbw m0, m1, m7 | |
48 | punpcklbw m2, m3, m7 | |
49 | punpckhbw m1, m7 | |
50 | punpckhbw m3, m7 | |
51 | paddw m0, m1 | |
52 | paddw m2, m3 | |
53 | paddw m0, m2 | |
54 | paddw m6, m0 | |
55 | lea src1q, [src1q + 2*stride1q] | |
56 | lea src2q, [src2q + 2*stride2q] | |
57 | %endrep | |
58 | psrlq m0, m6, 32 | |
59 | paddw m6, m0 | |
60 | psrlq m0, m6, 16 | |
61 | paddw m6, m0 | |
62 | movd eax, m6 | |
63 | movzx eax, ax | |
64 | RET | |
65 | ||
66 | ;------------------------------------------------------------------------------- | |
67 | ; int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, | |
68 | ; const uint8_t *src2, ptrdiff_t stride2); | |
69 | ;------------------------------------------------------------------------------- | |
70 | INIT_MMX mmxext | |
71 | cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 | |
72 | pxor m2, m2 | |
73 | %rep 4 | |
74 | mova m0, [src1q] | |
75 | mova m1, [src1q + stride1q] | |
76 | psadbw m0, [src2q] | |
77 | psadbw m1, [src2q + stride2q] | |
78 | paddw m2, m0 | |
79 | paddw m2, m1 | |
80 | lea src1q, [src1q + 2*stride1q] | |
81 | lea src2q, [src2q + 2*stride2q] | |
82 | %endrep | |
83 | movd eax, m2 | |
84 | RET | |
85 | ||
86 | ;------------------------------------------------------------------------------- | |
87 | ; int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, | |
88 | ; const uint8_t *src2, ptrdiff_t stride2); | |
89 | ;------------------------------------------------------------------------------- | |
90 | INIT_MMX mmxext | |
91 | cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2 | |
92 | pxor m2, m2 | |
93 | %rep 16 | |
94 | mova m0, [src1q] | |
95 | mova m1, [src1q + 8] | |
96 | psadbw m0, [src2q] | |
97 | psadbw m1, [src2q + 8] | |
98 | paddw m2, m0 | |
99 | paddw m2, m1 | |
100 | add src1q, stride1q | |
101 | add src2q, stride2q | |
102 | %endrep | |
103 | movd eax, m2 | |
104 | RET | |
105 | ||
106 | ;------------------------------------------------------------------------------- | |
107 | ; int ff_pixelutils_sad_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, | |
108 | ; const uint8_t *src2, ptrdiff_t stride2); | |
109 | ;------------------------------------------------------------------------------- | |
110 | INIT_XMM sse2 | |
111 | cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2 | |
112 | movu m4, [src1q] | |
113 | movu m2, [src2q] | |
114 | movu m1, [src1q + stride1q] | |
115 | movu m3, [src2q + stride2q] | |
116 | psadbw m4, m2 | |
117 | psadbw m1, m3 | |
118 | paddw m4, m1 | |
119 | %rep 7 | |
120 | lea src1q, [src1q + 2*stride1q] | |
121 | lea src2q, [src2q + 2*stride2q] | |
122 | movu m0, [src1q] | |
123 | movu m2, [src2q] | |
124 | movu m1, [src1q + stride1q] | |
125 | movu m3, [src2q + stride2q] | |
126 | psadbw m0, m2 | |
127 | psadbw m1, m3 | |
128 | paddw m4, m0 | |
129 | paddw m4, m1 | |
130 | %endrep | |
131 | movhlps m0, m4 | |
132 | paddw m4, m0 | |
133 | movd eax, m4 | |
134 | RET | |
135 | ||
136 | ;------------------------------------------------------------------------------- | |
137 | ; int ff_pixelutils_sad_[au]_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, | |
138 | ; const uint8_t *src2, ptrdiff_t stride2); | |
139 | ;------------------------------------------------------------------------------- | |
140 | %macro SAD_XMM_16x16 1 | |
141 | INIT_XMM sse2 | |
142 | cglobal pixelutils_sad_%1_16x16, 4,4,3, src1, stride1, src2, stride2 | |
143 | mov%1 m2, [src2q] | |
144 | psadbw m2, [src1q] | |
145 | mov%1 m1, [src2q + stride2q] | |
146 | psadbw m1, [src1q + stride1q] | |
147 | paddw m2, m1 | |
148 | %rep 7 | |
149 | lea src1q, [src1q + 2*stride1q] | |
150 | lea src2q, [src2q + 2*stride2q] | |
151 | mov%1 m0, [src2q] | |
152 | psadbw m0, [src1q] | |
153 | mov%1 m1, [src2q + stride2q] | |
154 | psadbw m1, [src1q + stride1q] | |
155 | paddw m2, m0 | |
156 | paddw m2, m1 | |
157 | %endrep | |
158 | movhlps m0, m2 | |
159 | paddw m2, m0 | |
160 | movd eax, m2 | |
161 | RET | |
162 | %endmacro | |
163 | ||
164 | SAD_XMM_16x16 a | |
165 | SAD_XMM_16x16 u |