pixblockdsp.c

   1 /*
   2  * Copyright (c) 2002 Brian Foley
   3  * Copyright (c) 2002 Dieter Shirley
   4  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "config.h"
  24 #if HAVE_ALTIVEC_H
  25 #include <altivec.h>
  26 #endif
  27
  28 #include "libavutil/attributes.h"
  29 #include "libavutil/cpu.h"
  30 #include "libavutil/ppc/cpu.h"
  31 #include "libavutil/ppc/types_altivec.h"
  32 #include "libavutil/ppc/util_altivec.h"
  33 #include "libavcodec/avcodec.h"
  34 #include "libavcodec/pixblockdsp.h"
  35
  36 #if HAVE_ALTIVEC
  37
  38 #if HAVE_VSX
  39 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
  40                                ptrdiff_t line_size)
  41 {
  42     int i;
  43     vector unsigned char perm =
  44         (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
  45             0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
  46     const vector unsigned char zero =
  47         (const vector unsigned char) vec_splat_u8(0);
  48
  49     for (i = 0; i < 8; i++) {
  50         /* Read potentially unaligned pixels.
  51          * We're reading 16 pixels, and actually only want 8,
  52          * but we simply ignore the extras. */
  53         vector unsigned char bytes = vec_vsx_ld(0, pixels);
  54
  55         // Convert the bytes into shorts.
  56         //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
  57         vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
  58
  59         // Save the data to the block, we assume the block is 16-byte aligned.
  60         vec_vsx_st(shorts, i * 16, (vector signed short *) block);
  61
  62         pixels += line_size;
  63     }
  64 }
  65 #else
  66 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
  67                                ptrdiff_t line_size)
  68 {
  69     int i;
  70     vector unsigned char perm = vec_lvsl(0, pixels);
  71     const vector unsigned char zero =
  72         (const vector unsigned char) vec_splat_u8(0);
  73
  74     for (i = 0; i < 8; i++) {
  75         /* Read potentially unaligned pixels.
  76          * We're reading 16 pixels, and actually only want 8,
  77          * but we simply ignore the extras. */
  78         vector unsigned char pixl = vec_ld(0, pixels);
  79         vector unsigned char pixr = vec_ld(7, pixels);
  80         vector unsigned char bytes = vec_perm(pixl, pixr, perm);
  81
  82         // Convert the bytes into shorts.
  83         vector signed short shorts = (vector signed short) vec_mergeh(zero,
  84                                                                       bytes);
  85
  86         // Save the data to the block, we assume the block is 16-byte aligned.
  87         vec_st(shorts, i * 16, (vector signed short *) block);
  88
  89         pixels += line_size;
  90     }
  91 }
  92
  93 #endif /* HAVE_VSX */
  94
  95 #if HAVE_VSX
  96 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
  97                                 const uint8_t *s2, int stride)
  98 {
  99   int i;
 100   const vector unsigned char zero =
 101     (const vector unsigned char) vec_splat_u8(0);
 102   vector signed short shorts1, shorts2;
 103
 104   for (i = 0; i < 4; i++) {
 105     /* Read potentially unaligned pixels.
 106      * We're reading 16 pixels, and actually only want 8,
 107      * but we simply ignore the extras. */
 108     vector unsigned char bytes = vec_vsx_ld(0,  s1);
 109
 110     // Convert the bytes into shorts.
 111     shorts1 = (vector signed short) vec_mergeh(bytes, zero);
 112
 113     // Do the same for the second block of pixels.
 114     bytes =vec_vsx_ld(0,  s2);
 115
 116     // Convert the bytes into shorts.
 117     shorts2 = (vector signed short) vec_mergeh(bytes, zero);
 118
 119     // Do the subtraction.
 120     shorts1 = vec_sub(shorts1, shorts2);
 121
 122     // Save the data to the block, we assume the block is 16-byte aligned.
 123     vec_vsx_st(shorts1, 0, (vector signed short *) block);
 124
 125     s1    += stride;
 126     s2    += stride;
 127     block += 8;
 128
 129     /* The code below is a copy of the code above...
 130      * This is a manual unroll. */
 131
 132     /* Read potentially unaligned pixels.
 133      * We're reading 16 pixels, and actually only want 8,
 134      * but we simply ignore the extras. */
 135     bytes = vec_vsx_ld(0,  s1);
 136
 137     // Convert the bytes into shorts.
 138     shorts1 = (vector signed short) vec_mergeh(bytes, zero);
 139
 140     // Do the same for the second block of pixels.
 141     bytes = vec_vsx_ld(0,  s2);
 142
 143     // Convert the bytes into shorts.
 144     shorts2 = (vector signed short) vec_mergeh(bytes, zero);
 145
 146     // Do the subtraction.
 147     shorts1 = vec_sub(shorts1, shorts2);
 148
 149     // Save the data to the block, we assume the block is 16-byte aligned.
 150     vec_vsx_st(shorts1, 0, (vector signed short *) block);
 151
 152     s1    += stride;
 153     s2    += stride;
 154     block += 8;
 155   }
 156 }
 157 #else
 158 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
 159                                 const uint8_t *s2, int stride)
 160 {
 161     int i;
 162     vector unsigned char perm1 = vec_lvsl(0, s1);
 163     vector unsigned char perm2 = vec_lvsl(0, s2);
 164     const vector unsigned char zero =
 165         (const vector unsigned char) vec_splat_u8(0);
 166     vector signed short shorts1, shorts2;
 167
 168     for (i = 0; i < 4; i++) {
 169         /* Read potentially unaligned pixels.
 170          * We're reading 16 pixels, and actually only want 8,
 171          * but we simply ignore the extras. */
 172         vector unsigned char pixl  = vec_ld(0,  s1);
 173         vector unsigned char pixr  = vec_ld(15, s1);
 174         vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
 175
 176         // Convert the bytes into shorts.
 177         shorts1 = (vector signed short) vec_mergeh(zero, bytes);
 178
 179         // Do the same for the second block of pixels.
 180         pixl  = vec_ld(0,  s2);
 181         pixr  = vec_ld(15, s2);
 182         bytes = vec_perm(pixl, pixr, perm2);
 183
 184         // Convert the bytes into shorts.
 185         shorts2 = (vector signed short) vec_mergeh(zero, bytes);
 186
 187         // Do the subtraction.
 188         shorts1 = vec_sub(shorts1, shorts2);
 189
 190         // Save the data to the block, we assume the block is 16-byte aligned.
 191         vec_st(shorts1, 0, (vector signed short *) block);
 192
 193         s1    += stride;
 194         s2    += stride;
 195         block += 8;
 196
 197         /* The code below is a copy of the code above...
 198          * This is a manual unroll. */
 199
 200         /* Read potentially unaligned pixels.
 201          * We're reading 16 pixels, and actually only want 8,
 202          * but we simply ignore the extras. */
 203         pixl  = vec_ld(0,  s1);
 204         pixr  = vec_ld(15, s1);
 205         bytes = vec_perm(pixl, pixr, perm1);
 206
 207         // Convert the bytes into shorts.
 208         shorts1 = (vector signed short) vec_mergeh(zero, bytes);
 209
 210         // Do the same for the second block of pixels.
 211         pixl  = vec_ld(0,  s2);
 212         pixr  = vec_ld(15, s2);
 213         bytes = vec_perm(pixl, pixr, perm2);
 214
 215         // Convert the bytes into shorts.
 216         shorts2 = (vector signed short) vec_mergeh(zero, bytes);
 217
 218         // Do the subtraction.
 219         shorts1 = vec_sub(shorts1, shorts2);
 220
 221         // Save the data to the block, we assume the block is 16-byte aligned.
 222         vec_st(shorts1, 0, (vector signed short *) block);
 223
 224         s1    += stride;
 225         s2    += stride;
 226         block += 8;
 227     }
 228 }
 229
 230 #endif /* HAVE_VSX */
 231
 232 #endif /* HAVE_ALTIVEC */
 233
 234 av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
 235                                      AVCodecContext *avctx,
 236                                      unsigned high_bit_depth)
 237 {
 238 #if HAVE_ALTIVEC
 239     if (!PPC_ALTIVEC(av_get_cpu_flags()))
 240         return;
 241
 242     c->diff_pixels = diff_pixels_altivec;
 243
 244     if (!high_bit_depth) {
 245         c->get_pixels = get_pixels_altivec;
 246     }
 247 #endif /* HAVE_ALTIVEC */
 248 }