| 1 | /* |
| 2 | * Alpha optimized DSP utils |
| 3 | * Copyright (c) 2002 Falk Hueffner <falk@debian.org> |
| 4 | * |
| 5 | * This file is part of FFmpeg. |
| 6 | * |
| 7 | * FFmpeg is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation; either |
| 10 | * version 2.1 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * FFmpeg is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with FFmpeg; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
| 21 | |
| 22 | /* |
| 23 | * These functions are scheduled for pca56. They should work |
| 24 | * reasonably on ev6, though. |
| 25 | */ |
| 26 | |
| 27 | #include "regdef.h" |
| 28 | |
| 29 | |
| 30 | .set noat |
| 31 | .set noreorder |
| 32 | .arch pca56 |
| 33 | .text |
| 34 | |
| 35 | /************************************************************************ |
| 36 | * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, |
| 37 | * int line_size, int h) |
| 38 | */ |
| 39 | .align 6 |
| 40 | .globl put_pixels_axp_asm |
| 41 | .ent put_pixels_axp_asm |
| 42 | put_pixels_axp_asm: |
| 43 | .frame sp, 0, ra |
| 44 | .prologue 0 |
| 45 | |
| 46 | and a1, 7, t0 |
| 47 | beq t0, $aligned |
| 48 | |
| 49 | .align 4 |
| 50 | $unaligned: |
| 51 | ldq_u t0, 0(a1) |
| 52 | ldq_u t1, 8(a1) |
| 53 | addq a1, a2, a1 |
| 54 | nop |
| 55 | |
| 56 | ldq_u t2, 0(a1) |
| 57 | ldq_u t3, 8(a1) |
| 58 | addq a1, a2, a1 |
| 59 | nop |
| 60 | |
| 61 | ldq_u t4, 0(a1) |
| 62 | ldq_u t5, 8(a1) |
| 63 | addq a1, a2, a1 |
| 64 | nop |
| 65 | |
| 66 | ldq_u t6, 0(a1) |
| 67 | ldq_u t7, 8(a1) |
| 68 | extql t0, a1, t0 |
| 69 | addq a1, a2, a1 |
| 70 | |
| 71 | extqh t1, a1, t1 |
| 72 | addq a0, a2, t8 |
| 73 | extql t2, a1, t2 |
| 74 | addq t8, a2, t9 |
| 75 | |
| 76 | extqh t3, a1, t3 |
| 77 | addq t9, a2, ta |
| 78 | extql t4, a1, t4 |
| 79 | or t0, t1, t0 |
| 80 | |
| 81 | extqh t5, a1, t5 |
| 82 | or t2, t3, t2 |
| 83 | extql t6, a1, t6 |
| 84 | or t4, t5, t4 |
| 85 | |
| 86 | extqh t7, a1, t7 |
| 87 | or t6, t7, t6 |
| 88 | stq t0, 0(a0) |
| 89 | stq t2, 0(t8) |
| 90 | |
| 91 | stq t4, 0(t9) |
| 92 | subq a3, 4, a3 |
| 93 | stq t6, 0(ta) |
| 94 | addq ta, a2, a0 |
| 95 | |
| 96 | bne a3, $unaligned |
| 97 | ret |
| 98 | |
| 99 | .align 4 |
| 100 | $aligned: |
| 101 | ldq t0, 0(a1) |
| 102 | addq a1, a2, a1 |
| 103 | ldq t1, 0(a1) |
| 104 | addq a1, a2, a1 |
| 105 | |
| 106 | ldq t2, 0(a1) |
| 107 | addq a1, a2, a1 |
| 108 | ldq t3, 0(a1) |
| 109 | |
| 110 | addq a0, a2, t4 |
| 111 | addq a1, a2, a1 |
| 112 | addq t4, a2, t5 |
| 113 | subq a3, 4, a3 |
| 114 | |
| 115 | stq t0, 0(a0) |
| 116 | addq t5, a2, t6 |
| 117 | stq t1, 0(t4) |
| 118 | addq t6, a2, a0 |
| 119 | |
| 120 | stq t2, 0(t5) |
| 121 | stq t3, 0(t6) |
| 122 | |
| 123 | bne a3, $aligned |
| 124 | ret |
| 125 | .end put_pixels_axp_asm |