[deb_ffmpeg.git] / x86 / vf_hqdn3d.asm

;******************************************************************************
;* Copyright (c) 2012 Loren Merritt
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************

%include "libavutil/x86/x86util.asm"

SECTION .text

%macro LOWPASS 3 ; prevsample, cursample, lut
    sub    %1q, %2q
%if lut_bits != 8
    sar    %1q, 8-lut_bits
%endif
    movsx  %1d, word [%3q+%1q*2]
    add    %1d, %2d
%endmacro

%macro LOAD 3 ; dstreg, x, bitdepth
%if %3 == 8
    movzx  %1, byte [srcq+%2]
%else
    movzx  %1, word [srcq+(%2)*2]
%endif
%if %3 != 16
    shl    %1, 16-%3
    add    %1, (1<<(15-%3))-1
%endif
%endmacro

%macro HQDN3D_ROW 1 ; bitdepth
%if ARCH_X86_64
cglobal hqdn3d_row_%1_x86, 7,10,0, src, dst, lineant, frameant, width, spatial, temporal, pixelant, t0, t1
%else
cglobal hqdn3d_row_%1_x86, 7,7,0, src, dst, lineant, frameant, width, spatial, temporal
%endif
    %assign bytedepth (%1+7)>>3
    %assign lut_bits 4+4*(%1/16)
    dec    widthq
    lea    srcq, [srcq+widthq*bytedepth]
    lea    dstq, [dstq+widthq*bytedepth]
    lea    frameantq, [frameantq+widthq*2]
    lea    lineantq,  [lineantq+widthq*2]
    neg    widthq
    %define xq widthq
%if ARCH_X86_32
    mov    dstmp, dstq
    mov    srcmp, srcq
    mov    frameantmp, frameantq
    mov    lineantmp,  lineantq
    %define dstq r0
    %define frameantq r0
    %define lineantq  r0
    %define pixelantq r1
    %define pixelantd r1d
    DECLARE_REG_TMP 2,3
%endif
    LOAD   pixelantd, xq, %1
ALIGN 16
.loop:
    movifnidn srcq, srcmp
    LOAD      t0d, xq+1, %1 ; skip on the last iteration to avoid overread
.loop2:
    movifnidn lineantq, lineantmp
    movzx     t1d, word [lineantq+xq*2]
    LOWPASS   t1, pixelant, spatial
    mov       [lineantq+xq*2], t1w
    LOWPASS   pixelant, t0, spatial
    movifnidn frameantq, frameantmp
    movzx     t0d, word [frameantq+xq*2]
    LOWPASS   t0, t1, temporal
    mov       [frameantq+xq*2], t0w
    movifnidn dstq, dstmp
%if %1 != 16
    shr    t0d, 16-%1 ; could eliminate this by storing from t0h, but only with some contraints on register allocation
%endif
%if %1 == 8
    mov    [dstq+xq], t0b
%else
    mov    [dstq+xq*2], t0w
%endif
    inc    xq
    jl .loop
    je .loop2
    REP_RET
%endmacro ; HQDN3D_ROW

HQDN3D_ROW 8
HQDN3D_ROW 9
HQDN3D_ROW 10
HQDN3D_ROW 16
Commit	Line	Data
	1	;******************************************************************************
	2	;* Copyright (c) 2012 Loren Merritt
	3	;*
	4	;* This file is part of FFmpeg.
	5	;*
	6	;* FFmpeg is free software; you can redistribute it and/or
	7	;* modify it under the terms of the GNU Lesser General Public
	8	;* License as published by the Free Software Foundation; either
	9	;* version 2.1 of the License, or (at your option) any later version.
	10	;*
	11	;* FFmpeg is distributed in the hope that it will be useful,
	12	;* but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	;* Lesser General Public License for more details.
	15	;*
	16	;* You should have received a copy of the GNU Lesser General Public
	17	;* License along with FFmpeg; if not, write to the Free Software
	18	;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	19	;******************************************************************************
	20
	21	%include "libavutil/x86/x86util.asm"
	22
	23	SECTION .text
	24
	25	%macro LOWPASS 3 ; prevsample, cursample, lut
	26	sub %1q, %2q
	27	%if lut_bits != 8
	28	sar %1q, 8-lut_bits
	29	%endif
	30	movsx %1d, word [%3q+%1q*2]
	31	add %1d, %2d
	32	%endmacro
	33
	34	%macro LOAD 3 ; dstreg, x, bitdepth
	35	%if %3 == 8
	36	movzx %1, byte [srcq+%2]
	37	%else
	38	movzx %1, word [srcq+(%2)*2]
	39	%endif
	40	%if %3 != 16
	41	shl %1, 16-%3
	42	add %1, (1<<(15-%3))-1
	43	%endif
	44	%endmacro
	45
	46	%macro HQDN3D_ROW 1 ; bitdepth
	47	%if ARCH_X86_64
	48	cglobal hqdn3d_row_%1_x86, 7,10,0, src, dst, lineant, frameant, width, spatial, temporal, pixelant, t0, t1
	49	%else
	50	cglobal hqdn3d_row_%1_x86, 7,7,0, src, dst, lineant, frameant, width, spatial, temporal
	51	%endif
	52	%assign bytedepth (%1+7)>>3
	53	%assign lut_bits 4+4*(%1/16)
	54	dec widthq
	55	lea srcq, [srcq+widthq*bytedepth]
	56	lea dstq, [dstq+widthq*bytedepth]
	57	lea frameantq, [frameantq+widthq*2]
	58	lea lineantq, [lineantq+widthq*2]
	59	neg widthq
	60	%define xq widthq
	61	%if ARCH_X86_32
	62	mov dstmp, dstq
	63	mov srcmp, srcq
	64	mov frameantmp, frameantq
	65	mov lineantmp, lineantq
	66	%define dstq r0
	67	%define frameantq r0
	68	%define lineantq r0
	69	%define pixelantq r1
	70	%define pixelantd r1d
	71	DECLARE_REG_TMP 2,3
	72	%endif
	73	LOAD pixelantd, xq, %1
	74	ALIGN 16
	75	.loop:
	76	movifnidn srcq, srcmp
	77	LOAD t0d, xq+1, %1 ; skip on the last iteration to avoid overread
	78	.loop2:
	79	movifnidn lineantq, lineantmp
	80	movzx t1d, word [lineantq+xq*2]
	81	LOWPASS t1, pixelant, spatial
	82	mov [lineantq+xq*2], t1w
	83	LOWPASS pixelant, t0, spatial
	84	movifnidn frameantq, frameantmp
	85	movzx t0d, word [frameantq+xq*2]
	86	LOWPASS t0, t1, temporal
	87	mov [frameantq+xq*2], t0w
	88	movifnidn dstq, dstmp
	89	%if %1 != 16
	90	shr t0d, 16-%1 ; could eliminate this by storing from t0h, but only with some contraints on register allocation
	91	%endif
	92	%if %1 == 8
	93	mov [dstq+xq], t0b
	94	%else
	95	mov [dstq+xq*2], t0w
	96	%endif
	97	inc xq
	98	jl .loop
	99	je .loop2
	100	REP_RET
	101	%endmacro ; HQDN3D_ROW
	102
	103	HQDN3D_ROW 8
	104	HQDN3D_ROW 9
	105	HQDN3D_ROW 10
	106	HQDN3D_ROW 16