| 1 | /* |
| 2 | * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com> |
| 3 | * |
| 4 | * This file is part of FFmpeg. |
| 5 | * |
| 6 | * FFmpeg is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * FFmpeg is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with FFmpeg; if not, write to the Free Software |
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | */ |
| 20 | |
| 21 | #include "libavutil/arm/asm.S" |
| 22 | |
| 23 | .macro alias name, tgt, set=1 |
| 24 | .if \set != 0 |
| 25 | \name .req \tgt |
| 26 | .else |
| 27 | .unreq \name |
| 28 | .endif |
| 29 | .endm |
| 30 | |
| 31 | .altmacro |
| 32 | |
| 33 | .macro alias_dw_all qw, dw_l, dw_h |
| 34 | alias q\qw\()_l, d\dw_l |
| 35 | alias q\qw\()_h, d\dw_h |
| 36 | .if \qw < 15 |
| 37 | alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2) |
| 38 | .endif |
| 39 | .endm |
| 40 | |
| 41 | alias_dw_all 0, 0, 1 |
| 42 | |
| 43 | .noaltmacro |
| 44 | |
| 45 | .macro alias_qw name, qw, set=1 |
| 46 | alias \name\(), \qw, \set |
| 47 | alias \name\()_l, \qw\()_l, \set |
| 48 | alias \name\()_h, \qw\()_h, \set |
| 49 | .endm |
| 50 | |
| 51 | .macro prologue |
| 52 | push {r4-r12, lr} |
| 53 | vpush {q4-q7} |
| 54 | .endm |
| 55 | |
| 56 | .macro epilogue |
| 57 | vpop {q4-q7} |
| 58 | pop {r4-r12, pc} |
| 59 | .endm |
| 60 | |
| 61 | .macro load_arg reg, ix |
| 62 | ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)] |
| 63 | .endm |
| 64 | |
| 65 | |
| 66 | /* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma |
| 67 | * int width, int height, |
| 68 | * int y_stride, int c_stride, int src_stride, |
| 69 | * int32_t coeff_table[9]); |
| 70 | */ |
| 71 | .macro alias_loop_420sp set=1 |
| 72 | alias src, r0, \set |
| 73 | alias src0, src, \set |
| 74 | alias y, r1, \set |
| 75 | alias y0, y, \set |
| 76 | alias chroma, r2, \set |
| 77 | alias width, r3, \set |
| 78 | alias header, width, \set |
| 79 | |
| 80 | alias height, r4, \set |
| 81 | alias y_stride, r5, \set |
| 82 | alias c_stride, r6, \set |
| 83 | alias c_padding, c_stride, \set |
| 84 | alias src_stride, r7, \set |
| 85 | |
| 86 | alias y0_end, r8, \set |
| 87 | |
| 88 | alias src_padding,r9, \set |
| 89 | alias y_padding, r10, \set |
| 90 | |
| 91 | alias src1, r11, \set |
| 92 | alias y1, r12, \set |
| 93 | |
| 94 | alias coeff_table,r12, \set |
| 95 | .endm |
| 96 | |
| 97 | |
| 98 | .macro loop_420sp s_fmt, d_fmt, init, kernel, precision |
| 99 | |
| 100 | function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1 |
| 101 | prologue |
| 102 | |
| 103 | alias_loop_420sp |
| 104 | |
| 105 | load_arg height, 4 |
| 106 | load_arg y_stride, 5 |
| 107 | load_arg c_stride, 6 |
| 108 | load_arg src_stride, 7 |
| 109 | load_arg coeff_table, 8 |
| 110 | |
| 111 | \init coeff_table |
| 112 | |
| 113 | sub y_padding, y_stride, width |
| 114 | sub c_padding, c_stride, width |
| 115 | sub src_padding, src_stride, width, LSL #2 |
| 116 | |
| 117 | add y0_end, y0, width |
| 118 | and header, width, #15 |
| 119 | |
| 120 | add y1, y0, y_stride |
| 121 | add src1, src0, src_stride |
| 122 | |
| 123 | 0: |
| 124 | cmp header, #0 |
| 125 | beq 1f |
| 126 | |
| 127 | \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header |
| 128 | |
| 129 | 1: |
| 130 | \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma |
| 131 | |
| 132 | cmp y0, y0_end |
| 133 | blt 1b |
| 134 | 2: |
| 135 | add y0, y1, y_padding |
| 136 | add y0_end, y1, y_stride |
| 137 | add chroma, chroma, c_padding |
| 138 | add src0, src1, src_padding |
| 139 | |
| 140 | add y1, y0, y_stride |
| 141 | add src1, src0, src_stride |
| 142 | |
| 143 | subs height, height, #2 |
| 144 | |
| 145 | bgt 0b |
| 146 | |
| 147 | epilogue |
| 148 | |
| 149 | alias_loop_420sp 0 |
| 150 | |
| 151 | endfunc |
| 152 | .endm |
| 153 | |
| 154 | .macro downsample |
| 155 | vpaddl.u8 r16x8, r8x16 |
| 156 | vpaddl.u8 g16x8, g8x16 |
| 157 | vpaddl.u8 b16x8, b8x16 |
| 158 | .endm |
| 159 | |
| 160 | |
| 161 | /* acculumate and right shift by 2 */ |
| 162 | .macro downsample_ars2 |
| 163 | vpadal.u8 r16x8, r8x16 |
| 164 | vpadal.u8 g16x8, g8x16 |
| 165 | vpadal.u8 b16x8, b8x16 |
| 166 | |
| 167 | vrshr.u16 r16x8, r16x8, #2 |
| 168 | vrshr.u16 g16x8, g16x8, #2 |
| 169 | vrshr.u16 b16x8, b16x8, #2 |
| 170 | .endm |
| 171 | |
| 172 | .macro store_y8_16x1 dst, count |
| 173 | .ifc "\count","" |
| 174 | vstmia \dst!, {y8x16} |
| 175 | .else |
| 176 | vstmia \dst, {y8x16} |
| 177 | add \dst, \dst, \count |
| 178 | .endif |
| 179 | .endm |
| 180 | |
| 181 | .macro store_chroma_nv12_8x1 dst, count |
| 182 | .ifc "\count","" |
| 183 | vst2.i8 {u8x8, v8x8}, [\dst]! |
| 184 | .else |
| 185 | vst2.i8 {u8x8, v8x8}, [\dst], \count |
| 186 | .endif |
| 187 | .endm |
| 188 | |
| 189 | .macro store_chroma_nv21_8x1 dst, count |
| 190 | .ifc "\count","" |
| 191 | vst2.i8 {v8x8, u8x8}, [\dst]! |
| 192 | .else |
| 193 | vst2.i8 {v8x8, u8x8}, [\dst], \count |
| 194 | .endif |
| 195 | .endm |
| 196 | |
| 197 | .macro load_8888_16x1 a, b, c, d, src, count |
| 198 | .ifc "\count","" |
| 199 | vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]! |
| 200 | vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]! |
| 201 | .else |
| 202 | vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]! |
| 203 | vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src] |
| 204 | sub \src, \src, #32 |
| 205 | add \src, \src, \count, LSL #2 |
| 206 | .endif |
| 207 | .endm |
| 208 | |
| 209 | .macro load_rgbx_16x1 src, count |
| 210 | load_8888_16x1 r, g, b, x, \src, \count |
| 211 | .endm |
| 212 | |
| 213 | .macro load_bgrx_16x1 src, count |
| 214 | load_8888_16x1 b, g, r, x, \src, \count |
| 215 | .endm |
| 216 | |
| 217 | .macro alias_src_rgbx set=1 |
| 218 | alias_src_8888 r, g, b, x, \set |
| 219 | .endm |
| 220 | |
| 221 | .macro alias_src_bgrx set=1 |
| 222 | alias_src_8888 b, g, r, x, \set |
| 223 | .endm |
| 224 | |
| 225 | .macro alias_dst_nv12 set=1 |
| 226 | alias u8x8, c8x8x2_l, \set |
| 227 | alias v8x8, c8x8x2_h, \set |
| 228 | .endm |
| 229 | |
| 230 | .macro alias_dst_nv21 set=1 |
| 231 | alias v8x8, c8x8x2_l, \set |
| 232 | alias u8x8, c8x8x2_h, \set |
| 233 | .endm |
| 234 | |
| 235 | |
| 236 | // common aliases |
| 237 | |
| 238 | alias CO_R d0 |
| 239 | CO_RY .dn d0.s16[0] |
| 240 | CO_RU .dn d0.s16[1] |
| 241 | CO_RV .dn d0.s16[2] |
| 242 | |
| 243 | alias CO_G d1 |
| 244 | CO_GY .dn d1.s16[0] |
| 245 | CO_GU .dn d1.s16[1] |
| 246 | CO_GV .dn d1.s16[2] |
| 247 | |
| 248 | alias CO_B d2 |
| 249 | CO_BY .dn d2.s16[0] |
| 250 | CO_BU .dn d2.s16[1] |
| 251 | CO_BV .dn d2.s16[2] |
| 252 | |
| 253 | alias BIAS_U, d3 |
| 254 | alias BIAS_V, BIAS_U |
| 255 | |
| 256 | alias BIAS_Y, q2 |
| 257 | |
| 258 | |
| 259 | /* q3-q6 R8G8B8X8 x16 */ |
| 260 | |
| 261 | .macro alias_src_8888 a, b, c, d, set |
| 262 | alias_qw \a\()8x16, q3, \set |
| 263 | alias_qw \b\()8x16, q4, \set |
| 264 | alias_qw \c\()8x16, q5, \set |
| 265 | alias_qw \d\()8x16, q6, \set |
| 266 | .endm |
| 267 | |
| 268 | .macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count |
| 269 | alias_src_\rgb_fmt |
| 270 | alias_dst_\yuv_fmt |
| 271 | |
| 272 | load_\rgb_fmt\()_16x1 \rgb0, \count |
| 273 | |
| 274 | downsample |
| 275 | compute_y_16x1 |
| 276 | store_y8_16x1 \y0, \count |
| 277 | |
| 278 | |
| 279 | load_\rgb_fmt\()_16x1 \rgb1, \count |
| 280 | downsample_ars2 |
| 281 | compute_y_16x1 |
| 282 | store_y8_16x1 \y1, \count |
| 283 | |
| 284 | compute_chroma_8x1 u, U |
| 285 | compute_chroma_8x1 v, V |
| 286 | |
| 287 | store_chroma_\yuv_fmt\()_8x1 \chroma, \count |
| 288 | |
| 289 | alias_dst_\yuv_fmt 0 |
| 290 | alias_src_\rgb_fmt 0 |
| 291 | .endm |