| 1 | /***************************************************************************** |
| 2 | * pixel.h: x86 pixel metrics |
| 3 | ***************************************************************************** |
| 4 | * Copyright (C) 2003-2013 x264 project |
| 5 | * |
| 6 | * Authors: Laurent Aimar <fenrir@via.ecp.fr> |
| 7 | * Loren Merritt <lorenm@u.washington.edu> |
| 8 | * Fiona Glaser <fiona@x264.com> |
| 9 | * |
| 10 | * This program is free software; you can redistribute it and/or modify |
| 11 | * it under the terms of the GNU General Public License as published by |
| 12 | * the Free Software Foundation; either version 2 of the License, or |
| 13 | * (at your option) any later version. |
| 14 | * |
| 15 | * This program is distributed in the hope that it will be useful, |
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | * GNU General Public License for more details. |
| 19 | * |
| 20 | * You should have received a copy of the GNU General Public License |
| 21 | * along with this program; if not, write to the Free Software |
| 22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
| 23 | * |
| 24 | * This program is also available under a commercial proprietary license. |
| 25 | * For more information, contact us at license @ x265.com. |
| 26 | *****************************************************************************/ |
| 27 | |
| 28 | #ifndef X265_I386_PIXEL_H |
| 29 | #define X265_I386_PIXEL_H |
| 30 | |
| 31 | #define DECL_PIXELS(ret, name, suffix, args) \ |
| 32 | ret x265_pixel_ ## name ## _16x64_ ## suffix args; \ |
| 33 | ret x265_pixel_ ## name ## _16x32_ ## suffix args; \ |
| 34 | ret x265_pixel_ ## name ## _16x16_ ## suffix args; \ |
| 35 | ret x265_pixel_ ## name ## _16x12_ ## suffix args; \ |
| 36 | ret x265_pixel_ ## name ## _16x8_ ## suffix args; \ |
| 37 | ret x265_pixel_ ## name ## _16x4_ ## suffix args; \ |
| 38 | ret x265_pixel_ ## name ## _8x32_ ## suffix args; \ |
| 39 | ret x265_pixel_ ## name ## _8x16_ ## suffix args; \ |
| 40 | ret x265_pixel_ ## name ## _8x8_ ## suffix args; \ |
| 41 | ret x265_pixel_ ## name ## _8x4_ ## suffix args; \ |
| 42 | ret x265_pixel_ ## name ## _4x16_ ## suffix args; \ |
| 43 | ret x265_pixel_ ## name ## _4x8_ ## suffix args; \ |
| 44 | ret x265_pixel_ ## name ## _4x4_ ## suffix args; \ |
| 45 | ret x265_pixel_ ## name ## _32x8_ ## suffix args; \ |
| 46 | ret x265_pixel_ ## name ## _32x16_ ## suffix args; \ |
| 47 | ret x265_pixel_ ## name ## _32x24_ ## suffix args; \ |
| 48 | ret x265_pixel_ ## name ## _24x32_ ## suffix args; \ |
| 49 | ret x265_pixel_ ## name ## _32x32_ ## suffix args; \ |
| 50 | ret x265_pixel_ ## name ## _32x64_ ## suffix args; \ |
| 51 | ret x265_pixel_ ## name ## _64x16_ ## suffix args; \ |
| 52 | ret x265_pixel_ ## name ## _64x32_ ## suffix args; \ |
| 53 | ret x265_pixel_ ## name ## _64x48_ ## suffix args; \ |
| 54 | ret x265_pixel_ ## name ## _64x64_ ## suffix args; \ |
| 55 | ret x265_pixel_ ## name ## _48x64_ ## suffix args; \ |
| 56 | ret x265_pixel_ ## name ## _24x32_ ## suffix args; \ |
| 57 | ret x265_pixel_ ## name ## _12x16_ ## suffix args; \ |
| 58 | |
| 59 | #define DECL_X1(name, suffix) \ |
| 60 | DECL_PIXELS(int, name, suffix, (const pixel*, intptr_t, const pixel*, intptr_t)) |
| 61 | |
| 62 | #define DECL_X1_SS(name, suffix) \ |
| 63 | DECL_PIXELS(int, name, suffix, (const int16_t*, intptr_t, const int16_t*, intptr_t)) |
| 64 | |
| 65 | #define DECL_X1_SP(name, suffix) \ |
| 66 | DECL_PIXELS(int, name, suffix, (const int16_t*, intptr_t, const pixel*, intptr_t)) |
| 67 | |
| 68 | #define DECL_X4(name, suffix) \ |
| 69 | DECL_PIXELS(void, name ## _x3, suffix, (const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*)) \ |
| 70 | DECL_PIXELS(void, name ## _x4, suffix, (const pixel*, const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*)) |
| 71 | |
| 72 | /* sad-a.asm */ |
| 73 | DECL_X1(sad, mmx2) |
| 74 | DECL_X1(sad, sse2) |
| 75 | DECL_X4(sad, sse2_misalign) |
| 76 | DECL_X1(sad, sse3) |
| 77 | DECL_X1(sad, sse2_aligned) |
| 78 | DECL_X1(sad, ssse3) |
| 79 | DECL_X1(sad, ssse3_aligned) |
| 80 | DECL_X1(sad, avx2) |
| 81 | DECL_X1(sad, avx2_aligned) |
| 82 | DECL_X4(sad, mmx2) |
| 83 | DECL_X4(sad, sse2) |
| 84 | DECL_X4(sad, sse3) |
| 85 | DECL_X4(sad, ssse3) |
| 86 | DECL_X4(sad, avx) |
| 87 | DECL_X4(sad, avx2) |
| 88 | DECL_X1(sad, cache32_mmx2); |
| 89 | DECL_X1(sad, cache64_mmx2); |
| 90 | DECL_X1(sad, cache64_sse2); |
| 91 | DECL_X1(sad, cache64_ssse3); |
| 92 | DECL_X4(sad, cache32_mmx2); |
| 93 | DECL_X4(sad, cache64_mmx2); |
| 94 | DECL_X4(sad, cache64_sse2); |
| 95 | DECL_X4(sad, cache64_ssse3); |
| 96 | |
| 97 | /* pixel-a.asm */ |
| 98 | DECL_X1(satd, mmx2) |
| 99 | DECL_X1(satd, sse2) |
| 100 | DECL_X1(satd, ssse3) |
| 101 | DECL_X1(satd, ssse3_atom) |
| 102 | DECL_X1(satd, sse4) |
| 103 | DECL_X1(satd, avx) |
| 104 | DECL_X1(satd, xop) |
| 105 | DECL_X1(satd, avx2) |
| 106 | int x265_pixel_satd_8x32_sse2(const pixel*, intptr_t, const pixel*, intptr_t); |
| 107 | int x265_pixel_satd_16x4_sse2(const pixel*, intptr_t, const pixel*, intptr_t); |
| 108 | int x265_pixel_satd_16x12_sse2(const pixel*, intptr_t, const pixel*, intptr_t); |
| 109 | int x265_pixel_satd_16x32_sse2(const pixel*, intptr_t, const pixel*, intptr_t); |
| 110 | int x265_pixel_satd_16x64_sse2(const pixel*, intptr_t, const pixel*, intptr_t); |
| 111 | |
| 112 | DECL_X1(sa8d, mmx2) |
| 113 | DECL_X1(sa8d, sse2) |
| 114 | DECL_X1(sa8d, ssse3) |
| 115 | DECL_X1(sa8d, ssse3_atom) |
| 116 | DECL_X1(sa8d, sse4) |
| 117 | DECL_X1(sa8d, avx) |
| 118 | DECL_X1(sa8d, xop) |
| 119 | DECL_X1(sa8d, avx2) |
| 120 | |
| 121 | /* ssd-a.asm */ |
| 122 | DECL_X1(ssd, mmx) |
| 123 | DECL_X1(ssd, mmx2) |
| 124 | DECL_X1(ssd, sse2slow) |
| 125 | DECL_X1(ssd, sse2) |
| 126 | DECL_X1(ssd, ssse3) |
| 127 | DECL_X1(ssd, avx) |
| 128 | DECL_X1(ssd, xop) |
| 129 | DECL_X1(ssd, avx2) |
| 130 | DECL_X1_SS(ssd_ss, mmx) |
| 131 | DECL_X1_SS(ssd_ss, mmx2) |
| 132 | DECL_X1_SS(ssd_ss, sse2slow) |
| 133 | DECL_X1_SS(ssd_ss, sse2) |
| 134 | DECL_X1_SS(ssd_ss, ssse3) |
| 135 | DECL_X1_SS(ssd_ss, sse4) |
| 136 | DECL_X1_SS(ssd_ss, avx) |
| 137 | DECL_X1_SS(ssd_ss, xop) |
| 138 | DECL_X1_SS(ssd_ss, avx2) |
| 139 | DECL_X1_SP(ssd_sp, sse4) |
| 140 | #define DECL_HEVC_SSD(suffix) \ |
| 141 | int x265_pixel_ssd_32x64_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 142 | int x265_pixel_ssd_16x64_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 143 | int x265_pixel_ssd_32x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 144 | int x265_pixel_ssd_32x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 145 | int x265_pixel_ssd_16x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 146 | int x265_pixel_ssd_32x24_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 147 | int x265_pixel_ssd_24x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 148 | int x265_pixel_ssd_32x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 149 | int x265_pixel_ssd_8x32_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 150 | int x265_pixel_ssd_16x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 151 | int x265_pixel_ssd_16x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 152 | int x265_pixel_ssd_8x16_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 153 | int x265_pixel_ssd_16x12_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 154 | int x265_pixel_ssd_16x4_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 155 | int x265_pixel_ssd_8x8_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); \ |
| 156 | int x265_pixel_ssd_8x4_ ## suffix(const pixel*, intptr_t, const pixel*, intptr_t); |
| 157 | DECL_HEVC_SSD(sse2) |
| 158 | DECL_HEVC_SSD(ssse3) |
| 159 | DECL_HEVC_SSD(avx) |
| 160 | |
| 161 | int x265_pixel_ssd_12x16_sse4(const pixel*, intptr_t, const pixel*, intptr_t); |
| 162 | int x265_pixel_ssd_24x32_sse4(const pixel*, intptr_t, const pixel*, intptr_t); |
| 163 | int x265_pixel_ssd_48x64_sse4(const pixel*, intptr_t, const pixel*, intptr_t); |
| 164 | int x265_pixel_ssd_64x16_sse4(const pixel*, intptr_t, const pixel*, intptr_t); |
| 165 | int x265_pixel_ssd_64x32_sse4(const pixel*, intptr_t, const pixel*, intptr_t); |
| 166 | int x265_pixel_ssd_64x48_sse4(const pixel*, intptr_t, const pixel*, intptr_t); |
| 167 | int x265_pixel_ssd_64x64_sse4(const pixel*, intptr_t, const pixel*, intptr_t); |
| 168 | |
| 169 | int x265_pixel_ssd_s_4_sse2(const int16_t*, intptr_t); |
| 170 | int x265_pixel_ssd_s_8_sse2(const int16_t*, intptr_t); |
| 171 | int x265_pixel_ssd_s_16_sse2(const int16_t*, intptr_t); |
| 172 | int x265_pixel_ssd_s_32_sse2(const int16_t*, intptr_t); |
| 173 | int x265_pixel_ssd_s_32_avx2(const int16_t*, intptr_t); |
| 174 | |
| 175 | #define ADDAVG(func) \ |
| 176 | void x265_ ## func ## _sse4(const int16_t*, const int16_t*, pixel*, intptr_t, intptr_t, intptr_t); |
| 177 | ADDAVG(addAvg_2x4) |
| 178 | ADDAVG(addAvg_2x8) |
| 179 | ADDAVG(addAvg_4x2); |
| 180 | ADDAVG(addAvg_4x4) |
| 181 | ADDAVG(addAvg_4x8) |
| 182 | ADDAVG(addAvg_4x16) |
| 183 | ADDAVG(addAvg_6x8) |
| 184 | ADDAVG(addAvg_8x2) |
| 185 | ADDAVG(addAvg_8x4) |
| 186 | ADDAVG(addAvg_8x6) |
| 187 | ADDAVG(addAvg_8x8) |
| 188 | ADDAVG(addAvg_8x16) |
| 189 | ADDAVG(addAvg_8x32) |
| 190 | ADDAVG(addAvg_12x16) |
| 191 | ADDAVG(addAvg_16x4) |
| 192 | ADDAVG(addAvg_16x8) |
| 193 | ADDAVG(addAvg_16x12) |
| 194 | ADDAVG(addAvg_16x16) |
| 195 | ADDAVG(addAvg_16x32) |
| 196 | ADDAVG(addAvg_16x64) |
| 197 | ADDAVG(addAvg_24x32) |
| 198 | ADDAVG(addAvg_32x8) |
| 199 | ADDAVG(addAvg_32x16) |
| 200 | ADDAVG(addAvg_32x24) |
| 201 | ADDAVG(addAvg_32x32) |
| 202 | ADDAVG(addAvg_32x64) |
| 203 | ADDAVG(addAvg_48x64) |
| 204 | ADDAVG(addAvg_64x16) |
| 205 | ADDAVG(addAvg_64x32) |
| 206 | ADDAVG(addAvg_64x48) |
| 207 | ADDAVG(addAvg_64x64) |
| 208 | |
| 209 | ADDAVG(addAvg_2x16) |
| 210 | ADDAVG(addAvg_4x32) |
| 211 | ADDAVG(addAvg_6x16) |
| 212 | ADDAVG(addAvg_8x12) |
| 213 | ADDAVG(addAvg_8x64) |
| 214 | ADDAVG(addAvg_12x32) |
| 215 | ADDAVG(addAvg_16x24) |
| 216 | ADDAVG(addAvg_24x64) |
| 217 | ADDAVG(addAvg_32x48) |
| 218 | |
| 219 | void x265_downShift_16_sse2(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask); |
| 220 | void x265_upShift_8_sse4(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift); |
| 221 | |
| 222 | #undef DECL_PIXELS |
| 223 | #undef DECL_HEVC_SSD |
| 224 | #undef DECL_X1 |
| 225 | #undef DECL_X4 |
| 226 | |
| 227 | #endif // ifndef X265_I386_PIXEL_H |