| 1 | /* |
| 2 | * Copyright (c) 2013 RISC OS Open Ltd |
| 3 | * Author: Ben Avison <bavison@riscosopen.org> |
| 4 | * |
| 5 | * This file is part of FFmpeg. |
| 6 | * |
| 7 | * FFmpeg is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation; either |
| 10 | * version 2.1 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * FFmpeg is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with FFmpeg; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
| 21 | |
| 22 | #include "libavutil/arm/asm.S" |
| 23 | |
| 24 | RESULT .req a1 |
| 25 | BUF .req a1 |
| 26 | SIZE .req a2 |
| 27 | PATTERN .req a3 |
| 28 | PTR .req a4 |
| 29 | DAT0 .req v1 |
| 30 | DAT1 .req v2 |
| 31 | DAT2 .req v3 |
| 32 | DAT3 .req v4 |
| 33 | TMP0 .req v5 |
| 34 | TMP1 .req v6 |
| 35 | TMP2 .req ip |
| 36 | TMP3 .req lr |
| 37 | |
| 38 | #define PRELOAD_DISTANCE 4 |
| 39 | |
| 40 | .macro innerloop4 |
| 41 | ldr DAT0, [PTR], #4 |
| 42 | subs SIZE, SIZE, #4 @ C flag survives rest of macro |
| 43 | sub TMP0, DAT0, PATTERN, lsr #14 |
| 44 | bic TMP0, TMP0, DAT0 |
| 45 | ands TMP0, TMP0, PATTERN |
| 46 | .endm |
| 47 | |
| 48 | .macro innerloop16 decrement, do_preload |
| 49 | ldmia PTR!, {DAT0,DAT1,DAT2,DAT3} |
| 50 | .ifnc "\do_preload","" |
| 51 | pld [PTR, #PRELOAD_DISTANCE*32] |
| 52 | .endif |
| 53 | .ifnc "\decrement","" |
| 54 | subs SIZE, SIZE, #\decrement @ C flag survives rest of macro |
| 55 | .endif |
| 56 | sub TMP0, DAT0, PATTERN, lsr #14 |
| 57 | sub TMP1, DAT1, PATTERN, lsr #14 |
| 58 | bic TMP0, TMP0, DAT0 |
| 59 | bic TMP1, TMP1, DAT1 |
| 60 | sub TMP2, DAT2, PATTERN, lsr #14 |
| 61 | sub TMP3, DAT3, PATTERN, lsr #14 |
| 62 | ands TMP0, TMP0, PATTERN |
| 63 | bic TMP2, TMP2, DAT2 |
| 64 | it eq |
| 65 | andseq TMP1, TMP1, PATTERN |
| 66 | bic TMP3, TMP3, DAT3 |
| 67 | itt eq |
| 68 | andseq TMP2, TMP2, PATTERN |
| 69 | andseq TMP3, TMP3, PATTERN |
| 70 | .endm |
| 71 | |
| 72 | /* int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size) */ |
| 73 | function ff_startcode_find_candidate_armv6, export=1 |
| 74 | push {v1-v6,lr} |
| 75 | mov PTR, BUF |
| 76 | @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go |
| 77 | @ before using code that does preloads |
| 78 | cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1 |
| 79 | blo 60f |
| 80 | |
| 81 | @ Get to word-alignment, 1 byte at a time |
| 82 | tst PTR, #3 |
| 83 | beq 2f |
| 84 | 1: ldrb DAT0, [PTR], #1 |
| 85 | sub SIZE, SIZE, #1 |
| 86 | teq DAT0, #0 |
| 87 | beq 90f |
| 88 | tst PTR, #3 |
| 89 | bne 1b |
| 90 | 2: @ Get to 4-word alignment, 1 word at a time |
| 91 | ldr PATTERN, =0x80008000 |
| 92 | setend be |
| 93 | tst PTR, #12 |
| 94 | beq 4f |
| 95 | 3: innerloop4 |
| 96 | bne 91f |
| 97 | tst PTR, #12 |
| 98 | bne 3b |
| 99 | 4: @ Get to cacheline (8-word) alignment |
| 100 | tst PTR, #16 |
| 101 | beq 5f |
| 102 | innerloop16 16 |
| 103 | bne 93f |
| 104 | 5: @ Check complete cachelines, with preloading |
| 105 | @ We need to stop when there are still (PRELOAD_DISTANCE+1) |
| 106 | @ complete cachelines to go |
| 107 | sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 |
| 108 | 6: innerloop16 , do_preload |
| 109 | bne 93f |
| 110 | innerloop16 32 |
| 111 | bne 93f |
| 112 | bcs 6b |
| 113 | @ Preload trailing part-cacheline, if any |
| 114 | tst SIZE, #31 |
| 115 | beq 7f |
| 116 | pld [PTR, #(PRELOAD_DISTANCE+1)*32] |
| 117 | @ Check remaining data without doing any more preloads. First |
| 118 | @ do in chunks of 4 words: |
| 119 | 7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16 |
| 120 | bmi 9f |
| 121 | 8: innerloop16 16 |
| 122 | bne 93f |
| 123 | bcs 8b |
| 124 | @ Then in words: |
| 125 | 9: adds SIZE, SIZE, #16 - 4 |
| 126 | bmi 11f |
| 127 | 10: innerloop4 |
| 128 | bne 91f |
| 129 | bcs 10b |
| 130 | 11: setend le |
| 131 | @ Check second byte of final halfword |
| 132 | ldrb DAT0, [PTR, #-1] |
| 133 | teq DAT0, #0 |
| 134 | beq 90f |
| 135 | @ Check any remaining bytes |
| 136 | tst SIZE, #3 |
| 137 | beq 13f |
| 138 | 12: ldrb DAT0, [PTR], #1 |
| 139 | sub SIZE, SIZE, #1 |
| 140 | teq DAT0, #0 |
| 141 | beq 90f |
| 142 | tst SIZE, #3 |
| 143 | bne 12b |
| 144 | @ No candidate found |
| 145 | 13: sub RESULT, PTR, BUF |
| 146 | b 99f |
| 147 | |
| 148 | 60: @ Small buffer - simply check by looping over bytes |
| 149 | subs SIZE, SIZE, #1 |
| 150 | bcc 99f |
| 151 | 61: ldrb DAT0, [PTR], #1 |
| 152 | subs SIZE, SIZE, #1 |
| 153 | teq DAT0, #0 |
| 154 | beq 90f |
| 155 | bcs 61b |
| 156 | @ No candidate found |
| 157 | sub RESULT, PTR, BUF |
| 158 | b 99f |
| 159 | |
| 160 | 90: @ Found a candidate at the preceding byte |
| 161 | sub RESULT, PTR, BUF |
| 162 | sub RESULT, RESULT, #1 |
| 163 | b 99f |
| 164 | |
| 165 | 91: @ Found a candidate somewhere in the preceding 4 bytes |
| 166 | sub RESULT, PTR, BUF |
| 167 | sub RESULT, RESULT, #4 |
| 168 | sub TMP0, DAT0, #0x20000 |
| 169 | bics TMP0, TMP0, DAT0 |
| 170 | itt pl |
| 171 | ldrbpl DAT0, [PTR, #-3] |
| 172 | addpl RESULT, RESULT, #2 |
| 173 | bpl 92f |
| 174 | teq RESULT, #0 |
| 175 | beq 98f @ don't look back a byte if found at first byte in buffer |
| 176 | ldrb DAT0, [PTR, #-5] |
| 177 | 92: teq DAT0, #0 |
| 178 | it eq |
| 179 | subeq RESULT, RESULT, #1 |
| 180 | b 98f |
| 181 | |
| 182 | 93: @ Found a candidate somewhere in the preceding 16 bytes |
| 183 | sub RESULT, PTR, BUF |
| 184 | sub RESULT, RESULT, #16 |
| 185 | teq TMP0, #0 |
| 186 | beq 95f @ not in first 4 bytes |
| 187 | sub TMP0, DAT0, #0x20000 |
| 188 | bics TMP0, TMP0, DAT0 |
| 189 | itt pl |
| 190 | ldrbpl DAT0, [PTR, #-15] |
| 191 | addpl RESULT, RESULT, #2 |
| 192 | bpl 94f |
| 193 | teq RESULT, #0 |
| 194 | beq 98f @ don't look back a byte if found at first byte in buffer |
| 195 | ldrb DAT0, [PTR, #-17] |
| 196 | 94: teq DAT0, #0 |
| 197 | it eq |
| 198 | subeq RESULT, RESULT, #1 |
| 199 | b 98f |
| 200 | 95: add RESULT, RESULT, #4 |
| 201 | teq TMP1, #0 |
| 202 | beq 96f @ not in next 4 bytes |
| 203 | sub TMP1, DAT1, #0x20000 |
| 204 | bics TMP1, TMP1, DAT1 |
| 205 | itee mi |
| 206 | ldrbmi DAT0, [PTR, #-13] |
| 207 | ldrbpl DAT0, [PTR, #-11] |
| 208 | addpl RESULT, RESULT, #2 |
| 209 | teq DAT0, #0 |
| 210 | it eq |
| 211 | subeq RESULT, RESULT, #1 |
| 212 | b 98f |
| 213 | 96: add RESULT, RESULT, #4 |
| 214 | teq TMP2, #0 |
| 215 | beq 97f @ not in next 4 bytes |
| 216 | sub TMP2, DAT2, #0x20000 |
| 217 | bics TMP2, TMP2, DAT2 |
| 218 | itee mi |
| 219 | ldrbmi DAT0, [PTR, #-9] |
| 220 | ldrbpl DAT0, [PTR, #-7] |
| 221 | addpl RESULT, RESULT, #2 |
| 222 | teq DAT0, #0 |
| 223 | it eq |
| 224 | subeq RESULT, RESULT, #1 |
| 225 | b 98f |
| 226 | 97: add RESULT, RESULT, #4 |
| 227 | sub TMP3, DAT3, #0x20000 |
| 228 | bics TMP3, TMP3, DAT3 |
| 229 | itee mi |
| 230 | ldrbmi DAT0, [PTR, #-5] |
| 231 | ldrbpl DAT0, [PTR, #-3] |
| 232 | addpl RESULT, RESULT, #2 |
| 233 | teq DAT0, #0 |
| 234 | it eq |
| 235 | subeq RESULT, RESULT, #1 |
| 236 | @ drop through to 98f |
| 237 | 98: setend le |
| 238 | 99: pop {v1-v6,pc} |
| 239 | endfunc |
| 240 | |
| 241 | .unreq RESULT |
| 242 | .unreq BUF |
| 243 | .unreq SIZE |
| 244 | .unreq PATTERN |
| 245 | .unreq PTR |
| 246 | .unreq DAT0 |
| 247 | .unreq DAT1 |
| 248 | .unreq DAT2 |
| 249 | .unreq DAT3 |
| 250 | .unreq TMP0 |
| 251 | .unreq TMP1 |
| 252 | .unreq TMP2 |
| 253 | .unreq TMP3 |