Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2009 Loren Merritt <lorenm@u.washington.edu> | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "libavutil/attributes.h" | |
23 | #include "libavutil/cpu.h" | |
24 | #include "libavutil/x86/asm.h" | |
25 | #include "libavutil/x86/cpu.h" | |
26 | #include "libavcodec/huffyuvdsp.h" | |
27 | ||
28 | void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, intptr_t w); | |
29 | void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, intptr_t w); | |
30 | ||
31 | void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, | |
32 | const uint8_t *diff, intptr_t w, | |
33 | int *left, int *left_top); | |
34 | void ff_add_hfyu_median_pred_sse2(uint8_t *dst, const uint8_t *top, | |
35 | const uint8_t *diff, intptr_t w, | |
36 | int *left, int *left_top); | |
37 | ||
38 | int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src, | |
39 | intptr_t w, int left); | |
40 | int ff_add_hfyu_left_pred_sse4(uint8_t *dst, const uint8_t *src, | |
41 | intptr_t w, int left); | |
42 | ||
43 | void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src, | |
44 | intptr_t w, uint8_t *left); | |
45 | void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src, | |
46 | intptr_t w, uint8_t *left); | |
47 | ||
48 | #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 | |
49 | static void add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top, | |
50 | const uint8_t *diff, intptr_t w, | |
51 | int *left, int *left_top) | |
52 | { | |
53 | x86_reg w2 = -w; | |
54 | x86_reg x; | |
55 | int l = *left & 0xff; | |
56 | int tl = *left_top & 0xff; | |
57 | int t; | |
58 | __asm__ volatile ( | |
59 | "mov %7, %3 \n" | |
60 | "1: \n" | |
61 | "movzbl (%3, %4), %2 \n" | |
62 | "mov %2, %k3 \n" | |
63 | "sub %b1, %b3 \n" | |
64 | "add %b0, %b3 \n" | |
65 | "mov %2, %1 \n" | |
66 | "cmp %0, %2 \n" | |
67 | "cmovg %0, %2 \n" | |
68 | "cmovg %1, %0 \n" | |
69 | "cmp %k3, %0 \n" | |
70 | "cmovg %k3, %0 \n" | |
71 | "mov %7, %3 \n" | |
72 | "cmp %2, %0 \n" | |
73 | "cmovl %2, %0 \n" | |
74 | "add (%6, %4), %b0 \n" | |
75 | "mov %b0, (%5, %4) \n" | |
76 | "inc %4 \n" | |
77 | "jl 1b \n" | |
78 | : "+&q"(l), "+&q"(tl), "=&r"(t), "=&q"(x), "+&r"(w2) | |
79 | : "r"(dst + w), "r"(diff + w), "rm"(top + w) | |
80 | ); | |
81 | *left = l; | |
82 | *left_top = tl; | |
83 | } | |
84 | #endif | |
85 | ||
86 | av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) | |
87 | { | |
88 | int cpu_flags = av_get_cpu_flags(); | |
89 | ||
90 | #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 | |
91 | if (cpu_flags & AV_CPU_FLAG_CMOV) | |
92 | c->add_hfyu_median_pred = add_hfyu_median_pred_cmov; | |
93 | #endif | |
94 | ||
95 | if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { | |
96 | c->add_bytes = ff_add_bytes_mmx; | |
97 | c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_mmx; | |
98 | } | |
99 | ||
100 | if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) { | |
101 | /* slower than cmov version on AMD */ | |
102 | if (!(cpu_flags & AV_CPU_FLAG_3DNOW)) | |
103 | c->add_hfyu_median_pred = ff_add_hfyu_median_pred_mmxext; | |
104 | } | |
105 | ||
106 | if (EXTERNAL_SSE2(cpu_flags)) { | |
107 | c->add_bytes = ff_add_bytes_sse2; | |
108 | c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2; | |
109 | c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2; | |
110 | } | |
111 | ||
112 | if (EXTERNAL_SSSE3(cpu_flags)) { | |
113 | c->add_hfyu_left_pred = ff_add_hfyu_left_pred_ssse3; | |
114 | if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe | |
115 | c->add_hfyu_left_pred = ff_add_hfyu_left_pred_sse4; | |
116 | } | |
117 | } |