/* * Copyright (C) 2013 Xiaolei Yu * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "libavutil/arm/asm.S" .macro alias name, tgt, set=1 .if \set != 0 \name .req \tgt .else .unreq \name .endif .endm .altmacro .macro alias_dw_all qw, dw_l, dw_h alias q\qw\()_l, d\dw_l alias q\qw\()_h, d\dw_h .if \qw < 15 alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2) .endif .endm alias_dw_all 0, 0, 1 .noaltmacro .macro alias_qw name, qw, set=1 alias \name\(), \qw, \set alias \name\()_l, \qw\()_l, \set alias \name\()_h, \qw\()_h, \set .endm .macro prologue push {r4-r12, lr} vpush {q4-q7} .endm .macro epilogue vpop {q4-q7} pop {r4-r12, pc} .endm .macro load_arg reg, ix ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)] .endm /* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma * int width, int height, * int y_stride, int c_stride, int src_stride, * int32_t coeff_table[9]); */ .macro alias_loop_420sp set=1 alias src, r0, \set alias src0, src, \set alias y, r1, \set alias y0, y, \set alias chroma, r2, \set alias width, r3, \set alias header, width, \set alias height, r4, \set alias y_stride, r5, \set alias c_stride, r6, \set alias c_padding, c_stride, \set alias src_stride, r7, \set alias y0_end, r8, \set alias src_padding,r9, \set alias y_padding, r10, \set alias src1, r11, \set alias y1, r12, \set alias coeff_table,r12, \set .endm .macro loop_420sp s_fmt, d_fmt, init, kernel, precision function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1 prologue alias_loop_420sp load_arg height, 4 load_arg y_stride, 5 load_arg c_stride, 6 load_arg src_stride, 7 load_arg coeff_table, 8 \init coeff_table sub y_padding, y_stride, width sub c_padding, c_stride, width sub src_padding, src_stride, width, LSL #2 add y0_end, y0, width and header, width, #15 add y1, y0, y_stride add src1, src0, src_stride 0: cmp header, #0 beq 1f \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header 1: \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma cmp y0, y0_end blt 1b 2: add y0, y1, y_padding add y0_end, y1, y_stride add chroma, chroma, c_padding add src0, src1, src_padding add y1, y0, y_stride add src1, src0, src_stride subs height, height, #2 bgt 0b epilogue alias_loop_420sp 0 endfunc .endm .macro downsample vpaddl.u8 r16x8, r8x16 vpaddl.u8 g16x8, g8x16 vpaddl.u8 b16x8, b8x16 .endm /* acculumate and right shift by 2 */ .macro downsample_ars2 vpadal.u8 r16x8, r8x16 vpadal.u8 g16x8, g8x16 vpadal.u8 b16x8, b8x16 vrshr.u16 r16x8, r16x8, #2 vrshr.u16 g16x8, g16x8, #2 vrshr.u16 b16x8, b16x8, #2 .endm .macro store_y8_16x1 dst, count .ifc "\count","" vstmia \dst!, {y8x16} .else vstmia \dst, {y8x16} add \dst, \dst, \count .endif .endm .macro store_chroma_nv12_8x1 dst, count .ifc "\count","" vst2.i8 {u8x8, v8x8}, [\dst]! .else vst2.i8 {u8x8, v8x8}, [\dst], \count .endif .endm .macro store_chroma_nv21_8x1 dst, count .ifc "\count","" vst2.i8 {v8x8, u8x8}, [\dst]! .else vst2.i8 {v8x8, u8x8}, [\dst], \count .endif .endm .macro load_8888_16x1 a, b, c, d, src, count .ifc "\count","" vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]! vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]! .else vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]! vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src] sub \src, \src, #32 add \src, \src, \count, LSL #2 .endif .endm .macro load_rgbx_16x1 src, count load_8888_16x1 r, g, b, x, \src, \count .endm .macro load_bgrx_16x1 src, count load_8888_16x1 b, g, r, x, \src, \count .endm .macro alias_src_rgbx set=1 alias_src_8888 r, g, b, x, \set .endm .macro alias_src_bgrx set=1 alias_src_8888 b, g, r, x, \set .endm .macro alias_dst_nv12 set=1 alias u8x8, c8x8x2_l, \set alias v8x8, c8x8x2_h, \set .endm .macro alias_dst_nv21 set=1 alias v8x8, c8x8x2_l, \set alias u8x8, c8x8x2_h, \set .endm // common aliases alias CO_R d0 CO_RY .dn d0.s16[0] CO_RU .dn d0.s16[1] CO_RV .dn d0.s16[2] alias CO_G d1 CO_GY .dn d1.s16[0] CO_GU .dn d1.s16[1] CO_GV .dn d1.s16[2] alias CO_B d2 CO_BY .dn d2.s16[0] CO_BU .dn d2.s16[1] CO_BV .dn d2.s16[2] alias BIAS_U, d3 alias BIAS_V, BIAS_U alias BIAS_Y, q2 /* q3-q6 R8G8B8X8 x16 */ .macro alias_src_8888 a, b, c, d, set alias_qw \a\()8x16, q3, \set alias_qw \b\()8x16, q4, \set alias_qw \c\()8x16, q5, \set alias_qw \d\()8x16, q6, \set .endm .macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count alias_src_\rgb_fmt alias_dst_\yuv_fmt load_\rgb_fmt\()_16x1 \rgb0, \count downsample compute_y_16x1 store_y8_16x1 \y0, \count load_\rgb_fmt\()_16x1 \rgb1, \count downsample_ars2 compute_y_16x1 store_y8_16x1 \y1, \count compute_chroma_8x1 u, U compute_chroma_8x1 v, V store_chroma_\yuv_fmt\()_8x1 \chroma, \count alias_dst_\yuv_fmt 0 alias_src_\rgb_fmt 0 .endm