| 1 | /* |
| 2 | * Copyright (c) 2009 Loren Merritt <lorenm@u.washington.edu> |
| 3 | * |
| 4 | * This file is part of FFmpeg. |
| 5 | * |
| 6 | * FFmpeg is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * FFmpeg is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with FFmpeg; if not, write to the Free Software |
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | */ |
| 20 | |
| 21 | #include "config.h" |
| 22 | #include "libavutil/attributes.h" |
| 23 | #include "libavutil/cpu.h" |
| 24 | #include "libavutil/x86/asm.h" |
| 25 | #include "libavutil/x86/cpu.h" |
| 26 | #include "libavcodec/huffyuvdsp.h" |
| 27 | |
| 28 | void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, intptr_t w); |
| 29 | void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, intptr_t w); |
| 30 | |
| 31 | void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, |
| 32 | const uint8_t *diff, intptr_t w, |
| 33 | int *left, int *left_top); |
| 34 | void ff_add_hfyu_median_pred_sse2(uint8_t *dst, const uint8_t *top, |
| 35 | const uint8_t *diff, intptr_t w, |
| 36 | int *left, int *left_top); |
| 37 | |
| 38 | int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src, |
| 39 | intptr_t w, int left); |
| 40 | int ff_add_hfyu_left_pred_sse4(uint8_t *dst, const uint8_t *src, |
| 41 | intptr_t w, int left); |
| 42 | |
| 43 | void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src, |
| 44 | intptr_t w, uint8_t *left); |
| 45 | void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src, |
| 46 | intptr_t w, uint8_t *left); |
| 47 | |
| 48 | #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 |
| 49 | static void add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top, |
| 50 | const uint8_t *diff, intptr_t w, |
| 51 | int *left, int *left_top) |
| 52 | { |
| 53 | x86_reg w2 = -w; |
| 54 | x86_reg x; |
| 55 | int l = *left & 0xff; |
| 56 | int tl = *left_top & 0xff; |
| 57 | int t; |
| 58 | __asm__ volatile ( |
| 59 | "mov %7, %3 \n" |
| 60 | "1: \n" |
| 61 | "movzbl (%3, %4), %2 \n" |
| 62 | "mov %2, %k3 \n" |
| 63 | "sub %b1, %b3 \n" |
| 64 | "add %b0, %b3 \n" |
| 65 | "mov %2, %1 \n" |
| 66 | "cmp %0, %2 \n" |
| 67 | "cmovg %0, %2 \n" |
| 68 | "cmovg %1, %0 \n" |
| 69 | "cmp %k3, %0 \n" |
| 70 | "cmovg %k3, %0 \n" |
| 71 | "mov %7, %3 \n" |
| 72 | "cmp %2, %0 \n" |
| 73 | "cmovl %2, %0 \n" |
| 74 | "add (%6, %4), %b0 \n" |
| 75 | "mov %b0, (%5, %4) \n" |
| 76 | "inc %4 \n" |
| 77 | "jl 1b \n" |
| 78 | : "+&q"(l), "+&q"(tl), "=&r"(t), "=&q"(x), "+&r"(w2) |
| 79 | : "r"(dst + w), "r"(diff + w), "rm"(top + w) |
| 80 | ); |
| 81 | *left = l; |
| 82 | *left_top = tl; |
| 83 | } |
| 84 | #endif |
| 85 | |
| 86 | av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) |
| 87 | { |
| 88 | int cpu_flags = av_get_cpu_flags(); |
| 89 | |
| 90 | #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 |
| 91 | if (cpu_flags & AV_CPU_FLAG_CMOV) |
| 92 | c->add_hfyu_median_pred = add_hfyu_median_pred_cmov; |
| 93 | #endif |
| 94 | |
| 95 | if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { |
| 96 | c->add_bytes = ff_add_bytes_mmx; |
| 97 | c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_mmx; |
| 98 | } |
| 99 | |
| 100 | if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) { |
| 101 | /* slower than cmov version on AMD */ |
| 102 | if (!(cpu_flags & AV_CPU_FLAG_3DNOW)) |
| 103 | c->add_hfyu_median_pred = ff_add_hfyu_median_pred_mmxext; |
| 104 | } |
| 105 | |
| 106 | if (EXTERNAL_SSE2(cpu_flags)) { |
| 107 | c->add_bytes = ff_add_bytes_sse2; |
| 108 | c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2; |
| 109 | c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2; |
| 110 | } |
| 111 | |
| 112 | if (EXTERNAL_SSSE3(cpu_flags)) { |
| 113 | c->add_hfyu_left_pred = ff_add_hfyu_left_pred_ssse3; |
| 114 | if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe |
| 115 | c->add_hfyu_left_pred = ff_add_hfyu_left_pred_sse4; |
| 116 | } |
| 117 | } |