1 ;******************************************************************************
2 ;* MMX/SSE2-optimized functions for the VP6 decoder
3 ;* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
4 ;* Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
6 ;* This file is part of FFmpeg.
8 ;* FFmpeg is free software; you can redistribute it and/or
9 ;* modify it under the terms of the GNU Lesser General Public
10 ;* License as published by the Free Software Foundation; either
11 ;* version 2.1 of the License, or (at your option) any later version.
13 ;* FFmpeg is distributed in the hope that it will be useful,
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ;* Lesser General Public License for more details.
18 ;* You should have received a copy of the GNU Lesser General Public
19 ;* License along with FFmpeg; if not, write to the Free Software
20 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;******************************************************************************
23 %include "libavutil/x86/x86util.asm"
39 pmullw m0, [rsp+8*11] ; src[x-8 ] * biweight [0]
40 pmullw m1, [rsp+8*12] ; src[x ] * biweight [1]
41 pmullw m3, [rsp+8*11] ; src[x-8 ] * biweight [0]
42 pmullw m4, [rsp+8*12] ; src[x ] * biweight [1]
53 pmullw m1, [rsp+8*13] ; src[x+8 ] * biweight [2]
54 pmullw m2, [rsp+8*14] ; src[x+16] * biweight [3]
55 pmullw m4, [rsp+8*13] ; src[x+8 ] * biweight [2]
56 pmullw m5, [rsp+8*14] ; src[x+16] * biweight [3]
61 paddsw m0, m6 ; Add 64
62 paddsw m3, m6 ; Add 64
72 pmullw m0, m4 ; src[x-8 ] * biweight [0]
73 pmullw m1, m5 ; src[x ] * biweight [1]
79 pmullw m1, m6 ; src[x+8 ] * biweight [2]
80 pmullw m2, m3 ; src[x+16] * biweight [3]
83 paddsw m0, [pw_64] ; Add 64
87 %endif ; mmsize == 8/16
114 %endif ; mmsize == 8/16
117 %macro vp6_filter_diag4 0
118 ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride,
119 ; const int16_t h_weight[4], const int16_t v_weights[4])
120 cglobal vp6_filter_diag4, 5, 7, 8
121 mov r5, rsp ; backup stack pointer
122 and rsp, ~(mmsize-1) ; align stack
142 DIAG4 r1, -1, 0, 1, 2, r3
154 DIAG4 r3, -8, 0, 8, 16, r0
160 mov rsp, r5 ; restore stack pointer