| 1 | /* |
| 2 | * This file is part of FFmpeg. |
| 3 | * |
| 4 | * FFmpeg is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU Lesser General Public |
| 6 | * License as published by the Free Software Foundation; either |
| 7 | * version 2.1 of the License, or (at your option) any later version. |
| 8 | * |
| 9 | * FFmpeg is distributed in the hope that it will be useful, |
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | * Lesser General Public License for more details. |
| 13 | * |
| 14 | * You should have received a copy of the GNU Lesser General Public |
| 15 | * License along with FFmpeg; if not, write to the Free Software |
| 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 17 | */ |
| 18 | |
| 19 | #include "config.h" |
| 20 | |
| 21 | #include "fdct.h" |
| 22 | #include "xvididct.h" |
| 23 | #include "simple_idct.h" |
| 24 | |
| 25 | #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM |
| 26 | void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize, |
| 27 | int16_t *block, int16_t *qmat); |
| 28 | |
| 29 | static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){ |
| 30 | DECLARE_ALIGNED(16, static int16_t, qmat)[64]; |
| 31 | DECLARE_ALIGNED(16, static int16_t, tmp)[64]; |
| 32 | int i; |
| 33 | |
| 34 | for(i=0; i<64; i++){ |
| 35 | qmat[i]=4; |
| 36 | tmp[i]= dst[i]; |
| 37 | } |
| 38 | ff_prores_idct_put_10_sse2(dst, 16, tmp, qmat); |
| 39 | |
| 40 | for(i=0; i<64; i++) { |
| 41 | dst[i] -= 512; |
| 42 | } |
| 43 | } |
| 44 | #endif |
| 45 | |
| 46 | static const struct algo fdct_tab_arch[] = { |
| 47 | #if HAVE_MMX_INLINE |
| 48 | { "MMX", ff_fdct_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX }, |
| 49 | #endif |
| 50 | #if HAVE_MMXEXT_INLINE |
| 51 | { "MMXEXT", ff_fdct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT }, |
| 52 | #endif |
| 53 | #if HAVE_SSE2_INLINE |
| 54 | { "SSE2", ff_fdct_sse2, FF_IDCT_PERM_NONE, AV_CPU_FLAG_SSE2 }, |
| 55 | #endif |
| 56 | { 0 } |
| 57 | }; |
| 58 | |
| 59 | static const struct algo idct_tab_arch[] = { |
| 60 | #if HAVE_MMX_INLINE |
| 61 | { "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX }, |
| 62 | #endif |
| 63 | #if CONFIG_MPEG4_DECODER |
| 64 | #if HAVE_MMX_INLINE |
| 65 | { "XVID-MMX", ff_xvid_idct_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX, 1 }, |
| 66 | #endif |
| 67 | #if HAVE_MMXEXT_INLINE |
| 68 | { "XVID-MMXEXT", ff_xvid_idct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT, 1 }, |
| 69 | #endif |
| 70 | #if HAVE_SSE2_INLINE |
| 71 | { "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 }, |
| 72 | #if ARCH_X86_64 && HAVE_YASM |
| 73 | { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 }, |
| 74 | #endif |
| 75 | #endif |
| 76 | #endif /* CONFIG_MPEG4_DECODER */ |
| 77 | { 0 } |
| 78 | }; |
| 79 | |
| 80 | static short idct_simple_mmx_perm[64] = { |
| 81 | 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
| 82 | 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, |
| 83 | 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, |
| 84 | 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, |
| 85 | 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, |
| 86 | 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, |
| 87 | 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, |
| 88 | 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, |
| 89 | }; |
| 90 | |
| 91 | static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 }; |
| 92 | |
| 93 | static int permute_x86(int16_t dst[64], const int16_t src[64], |
| 94 | enum idct_permutation_type perm_type) |
| 95 | { |
| 96 | int i; |
| 97 | |
| 98 | switch (perm_type) { |
| 99 | case FF_IDCT_PERM_SIMPLE: |
| 100 | for (i = 0; i < 64; i++) |
| 101 | dst[idct_simple_mmx_perm[i]] = src[i]; |
| 102 | return 1; |
| 103 | case FF_IDCT_PERM_SSE2: |
| 104 | for (i = 0; i < 64; i++) |
| 105 | dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i]; |
| 106 | return 1; |
| 107 | } |
| 108 | |
| 109 | return 0; |
| 110 | } |