Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | ;****************************************************************************** |
2 | ;* FLAC DSP SIMD optimizations | |
3 | ;* | |
4 | ;* Copyright (C) 2014 Loren Merritt | |
5 | ;* | |
6 | ;* This file is part of FFmpeg. | |
7 | ;* | |
8 | ;* FFmpeg is free software; you can redistribute it and/or | |
9 | ;* modify it under the terms of the GNU Lesser General Public | |
10 | ;* License as published by the Free Software Foundation; either | |
11 | ;* version 2.1 of the License, or (at your option) any later version. | |
12 | ;* | |
13 | ;* FFmpeg is distributed in the hope that it will be useful, | |
14 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | ;* Lesser General Public License for more details. | |
17 | ;* | |
18 | ;* You should have received a copy of the GNU Lesser General Public | |
19 | ;* License along with FFmpeg; if not, write to the Free Software | |
20 | ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | ;****************************************************************************** | |
22 | ||
23 | %include "libavutil/x86/x86util.asm" | |
24 | ||
25 | SECTION .text | |
26 | ||
27 | %macro LPC_32 1 | |
28 | INIT_XMM %1 | |
29 | cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j | |
30 | sub lend, pred_orderd | |
31 | jle .ret | |
32 | lea decodedq, [decodedq+pred_orderq*4-8] | |
33 | lea coeffsq, [coeffsq+pred_orderq*4] | |
34 | neg pred_orderq | |
35 | movd m4, qlevelm | |
36 | ALIGN 16 | |
37 | .loop_sample: | |
38 | movd m0, [decodedq+pred_orderq*4+8] | |
39 | add decodedq, 8 | |
40 | movd m1, [coeffsq+pred_orderq*4] | |
41 | pxor m2, m2 | |
42 | pxor m3, m3 | |
43 | lea jq, [pred_orderq+1] | |
44 | test jq, jq | |
45 | jz .end_order | |
46 | .loop_order: | |
47 | PMACSDQL m2, m0, m1, m2, m0 | |
48 | movd m0, [decodedq+jq*4] | |
49 | PMACSDQL m3, m1, m0, m3, m1 | |
50 | movd m1, [coeffsq+jq*4] | |
51 | inc jq | |
52 | jl .loop_order | |
53 | .end_order: | |
54 | PMACSDQL m2, m0, m1, m2, m0 | |
55 | psrlq m2, m4 | |
56 | movd m0, [decodedq] | |
57 | paddd m0, m2 | |
58 | movd [decodedq], m0 | |
59 | sub lend, 2 | |
60 | jl .ret | |
61 | PMACSDQL m3, m1, m0, m3, m1 | |
62 | psrlq m3, m4 | |
63 | movd m1, [decodedq+4] | |
64 | paddd m1, m3 | |
65 | movd [decodedq+4], m1 | |
66 | jg .loop_sample | |
67 | .ret: | |
68 | REP_RET | |
69 | %endmacro | |
70 | ||
71 | %if HAVE_XOP_EXTERNAL | |
72 | LPC_32 xop | |
73 | %endif | |
74 | LPC_32 sse4 |