Imported Upstream version 1.4
[deb_x265.git] / source / test / checkasm-a.asm
1 ;*****************************************************************************
2 ;* checkasm-a.asm: assembly check tool
3 ;*****************************************************************************
4 ;* Copyright (C) 2008-2014 x264 project
5 ;*
6 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
7 ;* Henrik Gramner <henrik@gramner.com>
8 ;*
9 ;* This program is free software; you can redistribute it and/or modify
10 ;* it under the terms of the GNU General Public License as published by
11 ;* the Free Software Foundation; either version 2 of the License, or
12 ;* (at your option) any later version.
13 ;*
14 ;* This program is distributed in the hope that it will be useful,
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;* GNU General Public License for more details.
18 ;*
19 ;* You should have received a copy of the GNU General Public License
20 ;* along with this program; if not, write to the Free Software
21 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22 ;*
23 ;* This program is also available under a commercial proprietary license.
24 ;* For more information, contact us at license @ x265.com.
25 ;*****************************************************************************
26
27 %include "../common/x86/x86inc.asm"
28
29 SECTION_RODATA
30
31 error_message: db "failed to preserve register", 0
32
33 %if ARCH_X86_64
34 ; just random numbers to reduce the chance of incidental match
35 ALIGN 16
36 x6: ddq 0x79445c159ce790641a1b2550a612b48c
37 x7: ddq 0x86b2536fcd8cf6362eed899d5a28ddcd
38 x8: ddq 0x3f2bf84fc0fcca4eb0856806085e7943
39 x9: ddq 0xd229e1f5b281303facbd382dcf5b8de2
40 x10: ddq 0xab63e2e11fa38ed971aeaff20b095fd9
41 x11: ddq 0x77d410d5c42c882d89b0c0765892729a
42 x12: ddq 0x24b3c1d2a024048bc45ea11a955d8dd5
43 x13: ddq 0xdd7b8919edd427862e8ec680de14b47c
44 x14: ddq 0x11e53e2b2ac655ef135ce6888fa02cbf
45 x15: ddq 0x6de8f4c914c334d5011ff554472a7a10
46 n7: dq 0x21f86d66c8ca00ce
47 n8: dq 0x75b6ba21077c48ad
48 n9: dq 0xed56bb2dcb3c7736
49 n10: dq 0x8bda43d3fd1a7e06
50 n11: dq 0xb64a9c9e5d318408
51 n12: dq 0xdf9a54b303f1d3a3
52 n13: dq 0x4a75479abd64e097
53 n14: dq 0x249214109d5d1c88
54 %endif
55
56 SECTION .text
57
58 cextern_naked puts
59
60 ; max number of args used by any x265 asm function.
61 ; (max_args % 4) must equal 3 for stack alignment
62 %define max_args 15
63
64 %if ARCH_X86_64
65
66 ;-----------------------------------------------------------------------------
67 ; void x265_checkasm_stack_clobber( uint64_t clobber, ... )
68 ;-----------------------------------------------------------------------------
69 cglobal checkasm_stack_clobber, 1,2
70 ; Clobber the stack with junk below the stack pointer
71 %define size (max_args+6)*8
72 SUB rsp, size
73 mov r1, size-8
74 .loop:
75 mov [rsp+r1], r0
76 sub r1, 8
77 jge .loop
78 ADD rsp, size
79 RET
80
81 %if WIN64
82 %assign free_regs 7
83 %else
84 %assign free_regs 9
85 %endif
86
87 ;-----------------------------------------------------------------------------
88 ; intptr_t x265_checkasm_call( intptr_t (*func)(), int *ok, ... )
89 ;-----------------------------------------------------------------------------
90 cglobal checkasm_call_float
91 INIT_XMM
92 cglobal checkasm_call, 2,15,16,max_args*8+8
93 mov r6, r0
94 mov [rsp+max_args*8], r1
95
96 ; All arguments have been pushed on the stack instead of registers in order to
97 ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
98 mov r0, r6mp
99 mov r1, r7mp
100 mov r2, r8mp
101 mov r3, r9mp
102 %if UNIX64
103 mov r4, r10mp
104 mov r5, r11mp
105 %assign i 6
106 %rep max_args-6
107 mov r9, [rsp+stack_offset+(i+1)*8]
108 mov [rsp+(i-6)*8], r9
109 %assign i i+1
110 %endrep
111 %else
112 %assign i 4
113 %rep max_args-4
114 mov r9, [rsp+stack_offset+(i+7)*8]
115 mov [rsp+i*8], r9
116 %assign i i+1
117 %endrep
118 %endif
119
120 %if WIN64
121 %assign i 6
122 %rep 16-6
123 mova m %+ i, [x %+ i]
124 %assign i i+1
125 %endrep
126 %endif
127
128 %assign i 14
129 %rep 15-free_regs
130 mov r %+ i, [n %+ i]
131 %assign i i-1
132 %endrep
133 call r6
134 %assign i 14
135 %rep 15-free_regs
136 xor r %+ i, [n %+ i]
137 or r14, r %+ i
138 %assign i i-1
139 %endrep
140
141 %if WIN64
142 %assign i 6
143 %rep 16-6
144 pxor m %+ i, [x %+ i]
145 por m6, m %+ i
146 %assign i i+1
147 %endrep
148 packsswb m6, m6
149 movq r5, m6
150 or r14, r5
151 %endif
152
153 jz .ok
154 mov r9, rax
155 lea r0, [error_message]
156 call puts
157 mov r1, [rsp+max_args*8]
158 mov dword [r1], 0
159 mov rax, r9
160 .ok:
161 RET
162
163 %else
164
165 ; just random numbers to reduce the chance of incidental match
166 %define n3 dword 0x6549315c
167 %define n4 dword 0xe02f3e23
168 %define n5 dword 0xb78d0d1d
169 %define n6 dword 0x33627ba7
170
171 ;-----------------------------------------------------------------------------
172 ; intptr_t x265_checkasm_call( intptr_t (*func)(), int *ok, ... )
173 ;-----------------------------------------------------------------------------
174 cglobal checkasm_call_float
175 cglobal checkasm_call, 1,7
176 mov r3, n3
177 mov r4, n4
178 mov r5, n5
179 mov r6, n6
180 %rep max_args
181 push dword [esp+24+max_args*4]
182 %endrep
183 call r0
184 add esp, max_args*4
185 xor r3, n3
186 xor r4, n4
187 xor r5, n5
188 xor r6, n6
189 or r3, r4
190 or r5, r6
191 or r3, r5
192 jz .ok
193 mov r3, eax
194 lea r1, [error_message]
195 push r1
196 call puts
197 add esp, 4
198 mov r1, r1m
199 mov dword [r1], 0
200 mov eax, r3
201 .ok:
202 REP_RET
203
204 %endif ; ARCH_X86_64
205
206 ;-----------------------------------------------------------------------------
207 ; int x265_stack_pagealign( int (*func)(), int align )
208 ;-----------------------------------------------------------------------------
209 cglobal stack_pagealign, 2,2
210 movsxdifnidn r1, r1d
211 push rbp
212 mov rbp, rsp
213 %if WIN64
214 sub rsp, 32 ; shadow space
215 %endif
216 and rsp, ~0xfff
217 sub rsp, r1
218 call r0
219 leave
220 RET
221