Imported Upstream version 1.4
[deb_x265.git] / source / common / x86 / const-a.asm
1 ;*****************************************************************************
2 ;* const-a.asm: x86 global constants
3 ;*****************************************************************************
4 ;* Copyright (C) 2010-2013 x264 project
5 ;*
6 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
7 ;* Fiona Glaser <fiona@x264.com>
8 ;* Min Chen <chenm003@163.com> <min.chen@multicorewareinc.com>
9 ;*
10 ;* This program is free software; you can redistribute it and/or modify
11 ;* it under the terms of the GNU General Public License as published by
12 ;* the Free Software Foundation; either version 2 of the License, or
13 ;* (at your option) any later version.
14 ;*
15 ;* This program is distributed in the hope that it will be useful,
16 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;* GNU General Public License for more details.
19 ;*
20 ;* You should have received a copy of the GNU General Public License
21 ;* along with this program; if not, write to the Free Software
22 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
23 ;*
24 ;* This program is also available under a commercial proprietary license.
25 ;* For more information, contact us at license @ x265.com.
26 ;*****************************************************************************
27
28 %include "x86inc.asm"
29
30 SECTION_RODATA 32
31
32 const pb_1, times 32 db 1
33
34 const hsub_mul, times 16 db 1, -1
35 const pw_1, times 16 dw 1
36 const pw_16, times 16 dw 16
37 const pw_32, times 16 dw 32
38 const pw_128, times 16 dw 128
39 const pw_256, times 16 dw 256
40 const pw_512, times 16 dw 512
41 const pw_1023, times 8 dw 1023
42 const pw_1024, times 16 dw 1024
43 const pw_4096, times 16 dw 4096
44 const pw_00ff, times 16 dw 0x00ff
45 const pw_pixel_max,times 16 dw ((1 << BIT_DEPTH)-1)
46 const deinterleave_shufd, dd 0,4,1,5,2,6,3,7
47 const pb_unpackbd1, times 2 db 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3
48 const pb_unpackbd2, times 2 db 4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7
49 const pb_unpackwq1, db 0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3
50 const pb_unpackwq2, db 4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7
51 const pw_swap, times 2 db 6,7,4,5,2,3,0,1
52
53 const pb_4, times 16 db 4
54 const pb_16, times 16 db 16
55 const pb_64, times 16 db 64
56 const pb_01, times 8 db 0,1
57 const pb_0, times 16 db 0
58 const pb_a1, times 16 db 0xa1
59 const pb_3, times 16 db 3
60 const pb_8, times 16 db 8
61 const pb_32, times 16 db 32
62 const pb_128, times 16 db 128
63 const pb_shuf8x8c, db 0,0,0,0,2,2,2,2,4,4,4,4,6,6,6,6
64
65 const pw_2, times 8 dw 2
66 const pw_m2, times 8 dw -2
67 const pw_4, times 8 dw 4
68 const pw_8, times 8 dw 8
69 const pw_64, times 8 dw 64
70 const pw_256, times 8 dw 256
71 const pw_32_0, times 4 dw 32,
72 times 4 dw 0
73 const pw_2000, times 8 dw 0x2000
74 const pw_8000, times 8 dw 0x8000
75 const pw_3fff, times 8 dw 0x3fff
76 const pw_ppppmmmm, dw 1,1,1,1,-1,-1,-1,-1
77 const pw_ppmmppmm, dw 1,1,-1,-1,1,1,-1,-1
78 const pw_pmpmpmpm, dw 1,-1,1,-1,1,-1,1,-1
79 const pw_pmmpzzzz, dw 1,-1,-1,1,0,0,0,0
80 const pd_1, times 8 dd 1
81 const pd_2, times 4 dd 2
82 const pd_4, times 4 dd 4
83 const pd_8, times 4 dd 8
84 const pd_16, times 4 dd 16
85 const pd_32, times 4 dd 32
86 const pd_64, times 4 dd 64
87 const pd_128, times 4 dd 128
88 const pd_256, times 4 dd 256
89 const pd_512, times 4 dd 512
90 const pd_1024, times 4 dd 1024
91 const pd_2048, times 4 dd 2048
92 const pd_ffff, times 4 dd 0xffff
93 const pd_32767, times 4 dd 32767
94 const pd_n32768, times 4 dd 0xffff8000
95 const pw_ff00, times 8 dw 0xff00
96
97 const multi_2Row, dw 1, 2, 3, 4, 1, 2, 3, 4
98 const multiL, dw 1, 2, 3, 4, 5, 6, 7, 8
99 const multiH, dw 9, 10, 11, 12, 13, 14, 15, 16
100 const multiH2, dw 17, 18, 19, 20, 21, 22, 23, 24
101 const multiH3, dw 25, 26, 27, 28, 29, 30, 31, 32
102
103 const popcnt_table
104 %assign x 0
105 %rep 256
106 ; population count
107 db ((x>>0)&1)+((x>>1)&1)+((x>>2)&1)+((x>>3)&1)+((x>>4)&1)+((x>>5)&1)+((x>>6)&1)+((x>>7)&1)
108 %assign x x+1
109 %endrep
110
111 const sw_64, dd 64