| 1 | ;***************************************************************************** |
| 2 | ;* checkasm-a.asm: assembly check tool |
| 3 | ;***************************************************************************** |
| 4 | ;* Copyright (C) 2008-2014 x264 project |
| 5 | ;* |
| 6 | ;* Authors: Loren Merritt <lorenm@u.washington.edu> |
| 7 | ;* Henrik Gramner <henrik@gramner.com> |
| 8 | ;* |
| 9 | ;* This program is free software; you can redistribute it and/or modify |
| 10 | ;* it under the terms of the GNU General Public License as published by |
| 11 | ;* the Free Software Foundation; either version 2 of the License, or |
| 12 | ;* (at your option) any later version. |
| 13 | ;* |
| 14 | ;* This program is distributed in the hope that it will be useful, |
| 15 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | ;* GNU General Public License for more details. |
| 18 | ;* |
| 19 | ;* You should have received a copy of the GNU General Public License |
| 20 | ;* along with this program; if not, write to the Free Software |
| 21 | ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
| 22 | ;* |
| 23 | ;* This program is also available under a commercial proprietary license. |
| 24 | ;* For more information, contact us at license @ x265.com. |
| 25 | ;***************************************************************************** |
| 26 | |
| 27 | %include "../common/x86/x86inc.asm" |
| 28 | |
| 29 | SECTION_RODATA |
| 30 | |
| 31 | error_message: db "failed to preserve register", 0 |
| 32 | |
| 33 | %if ARCH_X86_64 |
| 34 | ; just random numbers to reduce the chance of incidental match |
| 35 | ALIGN 16 |
| 36 | x6: ddq 0x79445c159ce790641a1b2550a612b48c |
| 37 | x7: ddq 0x86b2536fcd8cf6362eed899d5a28ddcd |
| 38 | x8: ddq 0x3f2bf84fc0fcca4eb0856806085e7943 |
| 39 | x9: ddq 0xd229e1f5b281303facbd382dcf5b8de2 |
| 40 | x10: ddq 0xab63e2e11fa38ed971aeaff20b095fd9 |
| 41 | x11: ddq 0x77d410d5c42c882d89b0c0765892729a |
| 42 | x12: ddq 0x24b3c1d2a024048bc45ea11a955d8dd5 |
| 43 | x13: ddq 0xdd7b8919edd427862e8ec680de14b47c |
| 44 | x14: ddq 0x11e53e2b2ac655ef135ce6888fa02cbf |
| 45 | x15: ddq 0x6de8f4c914c334d5011ff554472a7a10 |
| 46 | n7: dq 0x21f86d66c8ca00ce |
| 47 | n8: dq 0x75b6ba21077c48ad |
| 48 | n9: dq 0xed56bb2dcb3c7736 |
| 49 | n10: dq 0x8bda43d3fd1a7e06 |
| 50 | n11: dq 0xb64a9c9e5d318408 |
| 51 | n12: dq 0xdf9a54b303f1d3a3 |
| 52 | n13: dq 0x4a75479abd64e097 |
| 53 | n14: dq 0x249214109d5d1c88 |
| 54 | %endif |
| 55 | |
| 56 | SECTION .text |
| 57 | |
| 58 | cextern_naked puts |
| 59 | |
| 60 | ; max number of args used by any x265 asm function. |
| 61 | ; (max_args % 4) must equal 3 for stack alignment |
| 62 | %define max_args 15 |
| 63 | |
| 64 | %if ARCH_X86_64 |
| 65 | |
| 66 | ;----------------------------------------------------------------------------- |
| 67 | ; void x265_checkasm_stack_clobber( uint64_t clobber, ... ) |
| 68 | ;----------------------------------------------------------------------------- |
| 69 | cglobal checkasm_stack_clobber, 1,2 |
| 70 | ; Clobber the stack with junk below the stack pointer |
| 71 | %define size (max_args+6)*8 |
| 72 | SUB rsp, size |
| 73 | mov r1, size-8 |
| 74 | .loop: |
| 75 | mov [rsp+r1], r0 |
| 76 | sub r1, 8 |
| 77 | jge .loop |
| 78 | ADD rsp, size |
| 79 | RET |
| 80 | |
| 81 | %if WIN64 |
| 82 | %assign free_regs 7 |
| 83 | %else |
| 84 | %assign free_regs 9 |
| 85 | %endif |
| 86 | |
| 87 | ;----------------------------------------------------------------------------- |
| 88 | ; intptr_t x265_checkasm_call( intptr_t (*func)(), int *ok, ... ) |
| 89 | ;----------------------------------------------------------------------------- |
| 90 | cglobal checkasm_call_float |
| 91 | INIT_XMM |
| 92 | cglobal checkasm_call, 2,15,16,max_args*8+8 |
| 93 | mov r6, r0 |
| 94 | mov [rsp+max_args*8], r1 |
| 95 | |
| 96 | ; All arguments have been pushed on the stack instead of registers in order to |
| 97 | ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit. |
| 98 | mov r0, r6mp |
| 99 | mov r1, r7mp |
| 100 | mov r2, r8mp |
| 101 | mov r3, r9mp |
| 102 | %if UNIX64 |
| 103 | mov r4, r10mp |
| 104 | mov r5, r11mp |
| 105 | %assign i 6 |
| 106 | %rep max_args-6 |
| 107 | mov r9, [rsp+stack_offset+(i+1)*8] |
| 108 | mov [rsp+(i-6)*8], r9 |
| 109 | %assign i i+1 |
| 110 | %endrep |
| 111 | %else |
| 112 | %assign i 4 |
| 113 | %rep max_args-4 |
| 114 | mov r9, [rsp+stack_offset+(i+7)*8] |
| 115 | mov [rsp+i*8], r9 |
| 116 | %assign i i+1 |
| 117 | %endrep |
| 118 | %endif |
| 119 | |
| 120 | %if WIN64 |
| 121 | %assign i 6 |
| 122 | %rep 16-6 |
| 123 | mova m %+ i, [x %+ i] |
| 124 | %assign i i+1 |
| 125 | %endrep |
| 126 | %endif |
| 127 | |
| 128 | %assign i 14 |
| 129 | %rep 15-free_regs |
| 130 | mov r %+ i, [n %+ i] |
| 131 | %assign i i-1 |
| 132 | %endrep |
| 133 | call r6 |
| 134 | %assign i 14 |
| 135 | %rep 15-free_regs |
| 136 | xor r %+ i, [n %+ i] |
| 137 | or r14, r %+ i |
| 138 | %assign i i-1 |
| 139 | %endrep |
| 140 | |
| 141 | %if WIN64 |
| 142 | %assign i 6 |
| 143 | %rep 16-6 |
| 144 | pxor m %+ i, [x %+ i] |
| 145 | por m6, m %+ i |
| 146 | %assign i i+1 |
| 147 | %endrep |
| 148 | packsswb m6, m6 |
| 149 | movq r5, m6 |
| 150 | or r14, r5 |
| 151 | %endif |
| 152 | |
| 153 | jz .ok |
| 154 | mov r9, rax |
| 155 | lea r0, [error_message] |
| 156 | call puts |
| 157 | mov r1, [rsp+max_args*8] |
| 158 | mov dword [r1], 0 |
| 159 | mov rax, r9 |
| 160 | .ok: |
| 161 | RET |
| 162 | |
| 163 | %else |
| 164 | |
| 165 | ; just random numbers to reduce the chance of incidental match |
| 166 | %define n3 dword 0x6549315c |
| 167 | %define n4 dword 0xe02f3e23 |
| 168 | %define n5 dword 0xb78d0d1d |
| 169 | %define n6 dword 0x33627ba7 |
| 170 | |
| 171 | ;----------------------------------------------------------------------------- |
| 172 | ; intptr_t x265_checkasm_call( intptr_t (*func)(), int *ok, ... ) |
| 173 | ;----------------------------------------------------------------------------- |
| 174 | cglobal checkasm_call_float |
| 175 | cglobal checkasm_call, 1,7 |
| 176 | mov r3, n3 |
| 177 | mov r4, n4 |
| 178 | mov r5, n5 |
| 179 | mov r6, n6 |
| 180 | %rep max_args |
| 181 | push dword [esp+24+max_args*4] |
| 182 | %endrep |
| 183 | call r0 |
| 184 | add esp, max_args*4 |
| 185 | xor r3, n3 |
| 186 | xor r4, n4 |
| 187 | xor r5, n5 |
| 188 | xor r6, n6 |
| 189 | or r3, r4 |
| 190 | or r5, r6 |
| 191 | or r3, r5 |
| 192 | jz .ok |
| 193 | mov r3, eax |
| 194 | lea r1, [error_message] |
| 195 | push r1 |
| 196 | call puts |
| 197 | add esp, 4 |
| 198 | mov r1, r1m |
| 199 | mov dword [r1], 0 |
| 200 | mov eax, r3 |
| 201 | .ok: |
| 202 | REP_RET |
| 203 | |
| 204 | %endif ; ARCH_X86_64 |
| 205 | |
| 206 | ;----------------------------------------------------------------------------- |
| 207 | ; int x265_stack_pagealign( int (*func)(), int align ) |
| 208 | ;----------------------------------------------------------------------------- |
| 209 | cglobal stack_pagealign, 2,2 |
| 210 | movsxdifnidn r1, r1d |
| 211 | push rbp |
| 212 | mov rbp, rsp |
| 213 | %if WIN64 |
| 214 | sub rsp, 32 ; shadow space |
| 215 | %endif |
| 216 | and rsp, ~0xfff |
| 217 | sub rsp, r1 |
| 218 | call r0 |
| 219 | leave |
| 220 | RET |
| 221 | |