Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | ;***************************************************************************** |
2 | ;* checkasm-a.asm: assembly check tool | |
3 | ;***************************************************************************** | |
4 | ;* Copyright (C) 2008-2014 x264 project | |
5 | ;* | |
6 | ;* Authors: Loren Merritt <lorenm@u.washington.edu> | |
7 | ;* Henrik Gramner <henrik@gramner.com> | |
8 | ;* | |
9 | ;* This program is free software; you can redistribute it and/or modify | |
10 | ;* it under the terms of the GNU General Public License as published by | |
11 | ;* the Free Software Foundation; either version 2 of the License, or | |
12 | ;* (at your option) any later version. | |
13 | ;* | |
14 | ;* This program is distributed in the hope that it will be useful, | |
15 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | ;* GNU General Public License for more details. | |
18 | ;* | |
19 | ;* You should have received a copy of the GNU General Public License | |
20 | ;* along with this program; if not, write to the Free Software | |
21 | ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
22 | ;* | |
23 | ;* This program is also available under a commercial proprietary license. | |
24 | ;* For more information, contact us at license @ x265.com. | |
25 | ;***************************************************************************** | |
26 | ||
27 | %include "../common/x86/x86inc.asm" | |
28 | ||
29 | SECTION_RODATA | |
30 | ||
31 | error_message: db "failed to preserve register", 0 | |
32 | ||
33 | %if ARCH_X86_64 | |
34 | ; just random numbers to reduce the chance of incidental match | |
35 | ALIGN 16 | |
36 | x6: ddq 0x79445c159ce790641a1b2550a612b48c | |
37 | x7: ddq 0x86b2536fcd8cf6362eed899d5a28ddcd | |
38 | x8: ddq 0x3f2bf84fc0fcca4eb0856806085e7943 | |
39 | x9: ddq 0xd229e1f5b281303facbd382dcf5b8de2 | |
40 | x10: ddq 0xab63e2e11fa38ed971aeaff20b095fd9 | |
41 | x11: ddq 0x77d410d5c42c882d89b0c0765892729a | |
42 | x12: ddq 0x24b3c1d2a024048bc45ea11a955d8dd5 | |
43 | x13: ddq 0xdd7b8919edd427862e8ec680de14b47c | |
44 | x14: ddq 0x11e53e2b2ac655ef135ce6888fa02cbf | |
45 | x15: ddq 0x6de8f4c914c334d5011ff554472a7a10 | |
46 | n7: dq 0x21f86d66c8ca00ce | |
47 | n8: dq 0x75b6ba21077c48ad | |
48 | n9: dq 0xed56bb2dcb3c7736 | |
49 | n10: dq 0x8bda43d3fd1a7e06 | |
50 | n11: dq 0xb64a9c9e5d318408 | |
51 | n12: dq 0xdf9a54b303f1d3a3 | |
52 | n13: dq 0x4a75479abd64e097 | |
53 | n14: dq 0x249214109d5d1c88 | |
54 | %endif | |
55 | ||
56 | SECTION .text | |
57 | ||
58 | cextern_naked puts | |
59 | ||
60 | ; max number of args used by any x265 asm function. | |
61 | ; (max_args % 4) must equal 3 for stack alignment | |
62 | %define max_args 15 | |
63 | ||
64 | %if ARCH_X86_64 | |
65 | ||
66 | ;----------------------------------------------------------------------------- | |
67 | ; void x265_checkasm_stack_clobber( uint64_t clobber, ... ) | |
68 | ;----------------------------------------------------------------------------- | |
69 | cglobal checkasm_stack_clobber, 1,2 | |
70 | ; Clobber the stack with junk below the stack pointer | |
71 | %define size (max_args+6)*8 | |
72 | SUB rsp, size | |
73 | mov r1, size-8 | |
74 | .loop: | |
75 | mov [rsp+r1], r0 | |
76 | sub r1, 8 | |
77 | jge .loop | |
78 | ADD rsp, size | |
79 | RET | |
80 | ||
81 | %if WIN64 | |
82 | %assign free_regs 7 | |
83 | %else | |
84 | %assign free_regs 9 | |
85 | %endif | |
86 | ||
87 | ;----------------------------------------------------------------------------- | |
88 | ; intptr_t x265_checkasm_call( intptr_t (*func)(), int *ok, ... ) | |
89 | ;----------------------------------------------------------------------------- | |
90 | cglobal checkasm_call_float | |
91 | INIT_XMM | |
92 | cglobal checkasm_call, 2,15,16,max_args*8+8 | |
93 | mov r6, r0 | |
94 | mov [rsp+max_args*8], r1 | |
95 | ||
96 | ; All arguments have been pushed on the stack instead of registers in order to | |
97 | ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit. | |
98 | mov r0, r6mp | |
99 | mov r1, r7mp | |
100 | mov r2, r8mp | |
101 | mov r3, r9mp | |
102 | %if UNIX64 | |
103 | mov r4, r10mp | |
104 | mov r5, r11mp | |
105 | %assign i 6 | |
106 | %rep max_args-6 | |
107 | mov r9, [rsp+stack_offset+(i+1)*8] | |
108 | mov [rsp+(i-6)*8], r9 | |
109 | %assign i i+1 | |
110 | %endrep | |
111 | %else | |
112 | %assign i 4 | |
113 | %rep max_args-4 | |
114 | mov r9, [rsp+stack_offset+(i+7)*8] | |
115 | mov [rsp+i*8], r9 | |
116 | %assign i i+1 | |
117 | %endrep | |
118 | %endif | |
119 | ||
120 | %if WIN64 | |
121 | %assign i 6 | |
122 | %rep 16-6 | |
123 | mova m %+ i, [x %+ i] | |
124 | %assign i i+1 | |
125 | %endrep | |
126 | %endif | |
127 | ||
128 | %assign i 14 | |
129 | %rep 15-free_regs | |
130 | mov r %+ i, [n %+ i] | |
131 | %assign i i-1 | |
132 | %endrep | |
133 | call r6 | |
134 | %assign i 14 | |
135 | %rep 15-free_regs | |
136 | xor r %+ i, [n %+ i] | |
137 | or r14, r %+ i | |
138 | %assign i i-1 | |
139 | %endrep | |
140 | ||
141 | %if WIN64 | |
142 | %assign i 6 | |
143 | %rep 16-6 | |
144 | pxor m %+ i, [x %+ i] | |
145 | por m6, m %+ i | |
146 | %assign i i+1 | |
147 | %endrep | |
148 | packsswb m6, m6 | |
149 | movq r5, m6 | |
150 | or r14, r5 | |
151 | %endif | |
152 | ||
153 | jz .ok | |
154 | mov r9, rax | |
155 | lea r0, [error_message] | |
156 | call puts | |
157 | mov r1, [rsp+max_args*8] | |
158 | mov dword [r1], 0 | |
159 | mov rax, r9 | |
160 | .ok: | |
161 | RET | |
162 | ||
163 | %else | |
164 | ||
165 | ; just random numbers to reduce the chance of incidental match | |
166 | %define n3 dword 0x6549315c | |
167 | %define n4 dword 0xe02f3e23 | |
168 | %define n5 dword 0xb78d0d1d | |
169 | %define n6 dword 0x33627ba7 | |
170 | ||
171 | ;----------------------------------------------------------------------------- | |
172 | ; intptr_t x265_checkasm_call( intptr_t (*func)(), int *ok, ... ) | |
173 | ;----------------------------------------------------------------------------- | |
174 | cglobal checkasm_call_float | |
175 | cglobal checkasm_call, 1,7 | |
176 | mov r3, n3 | |
177 | mov r4, n4 | |
178 | mov r5, n5 | |
179 | mov r6, n6 | |
180 | %rep max_args | |
181 | push dword [esp+24+max_args*4] | |
182 | %endrep | |
183 | call r0 | |
184 | add esp, max_args*4 | |
185 | xor r3, n3 | |
186 | xor r4, n4 | |
187 | xor r5, n5 | |
188 | xor r6, n6 | |
189 | or r3, r4 | |
190 | or r5, r6 | |
191 | or r3, r5 | |
192 | jz .ok | |
193 | mov r3, eax | |
194 | lea r1, [error_message] | |
195 | push r1 | |
196 | call puts | |
197 | add esp, 4 | |
198 | mov r1, r1m | |
199 | mov dword [r1], 0 | |
200 | mov eax, r3 | |
201 | .ok: | |
202 | REP_RET | |
203 | ||
204 | %endif ; ARCH_X86_64 | |
205 | ||
206 | ;----------------------------------------------------------------------------- | |
207 | ; int x265_stack_pagealign( int (*func)(), int align ) | |
208 | ;----------------------------------------------------------------------------- | |
209 | cglobal stack_pagealign, 2,2 | |
210 | movsxdifnidn r1, r1d | |
211 | push rbp | |
212 | mov rbp, rsp | |
213 | %if WIN64 | |
214 | sub rsp, 32 ; shadow space | |
215 | %endif | |
216 | and rsp, ~0xfff | |
217 | sub rsp, r1 | |
218 | call r0 | |
219 | leave | |
220 | RET | |
221 |