2 * AltiVec-enhanced yuv-to-yuv conversion routines.
4 * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5 * based on the equivalent C code in swscale.c
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/attributes.h"
28 #include "libavutil/cpu.h"
29 #include "libswscale/swscale.h"
30 #include "libswscale/swscale_internal.h"
34 static int yv12toyuy2_unscaled_altivec(SwsContext
*c
, const uint8_t *src
[],
35 int srcStride
[], int srcSliceY
,
36 int srcSliceH
, uint8_t *dstParam
[],
39 uint8_t *dst
= dstParam
[0] + dstStride_a
[0] * srcSliceY
;
40 // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
41 // srcStride[0], srcStride[1], dstStride[0]);
42 const uint8_t *ysrc
= src
[0];
43 const uint8_t *usrc
= src
[1];
44 const uint8_t *vsrc
= src
[2];
45 const int width
= c
->srcW
;
46 const int height
= srcSliceH
;
47 const int lumStride
= srcStride
[0];
48 const int chromStride
= srcStride
[1];
49 const int dstStride
= dstStride_a
[0];
50 const vector
unsigned char yperm
= vec_lvsl(0, ysrc
);
51 const int vertLumPerChroma
= 2;
52 register unsigned int y
;
56 * 1) dst is 16 bytes-aligned
57 * 2) dstStride is a multiple of 16
58 * 3) width is a multiple of 16
59 * 4) lum & chrom stride are multiples of 8
62 for (y
= 0; y
< height
; y
++) {
64 for (i
= 0; i
< width
- 31; i
+= 32) {
65 const unsigned int j
= i
>> 1;
66 vector
unsigned char v_yA
= vec_ld(i
, ysrc
);
67 vector
unsigned char v_yB
= vec_ld(i
+ 16, ysrc
);
68 vector
unsigned char v_yC
= vec_ld(i
+ 32, ysrc
);
69 vector
unsigned char v_y1
= vec_perm(v_yA
, v_yB
, yperm
);
70 vector
unsigned char v_y2
= vec_perm(v_yB
, v_yC
, yperm
);
71 vector
unsigned char v_uA
= vec_ld(j
, usrc
);
72 vector
unsigned char v_uB
= vec_ld(j
+ 16, usrc
);
73 vector
unsigned char v_u
= vec_perm(v_uA
, v_uB
, vec_lvsl(j
, usrc
));
74 vector
unsigned char v_vA
= vec_ld(j
, vsrc
);
75 vector
unsigned char v_vB
= vec_ld(j
+ 16, vsrc
);
76 vector
unsigned char v_v
= vec_perm(v_vA
, v_vB
, vec_lvsl(j
, vsrc
));
77 vector
unsigned char v_uv_a
= vec_mergeh(v_u
, v_v
);
78 vector
unsigned char v_uv_b
= vec_mergel(v_u
, v_v
);
79 vector
unsigned char v_yuy2_0
= vec_mergeh(v_y1
, v_uv_a
);
80 vector
unsigned char v_yuy2_1
= vec_mergel(v_y1
, v_uv_a
);
81 vector
unsigned char v_yuy2_2
= vec_mergeh(v_y2
, v_uv_b
);
82 vector
unsigned char v_yuy2_3
= vec_mergel(v_y2
, v_uv_b
);
83 vec_st(v_yuy2_0
, (i
<< 1), dst
);
84 vec_st(v_yuy2_1
, (i
<< 1) + 16, dst
);
85 vec_st(v_yuy2_2
, (i
<< 1) + 32, dst
);
86 vec_st(v_yuy2_3
, (i
<< 1) + 48, dst
);
89 const unsigned int j
= i
>> 1;
90 vector
unsigned char v_y1
= vec_ld(i
, ysrc
);
91 vector
unsigned char v_u
= vec_ld(j
, usrc
);
92 vector
unsigned char v_v
= vec_ld(j
, vsrc
);
93 vector
unsigned char v_uv_a
= vec_mergeh(v_u
, v_v
);
94 vector
unsigned char v_yuy2_0
= vec_mergeh(v_y1
, v_uv_a
);
95 vector
unsigned char v_yuy2_1
= vec_mergel(v_y1
, v_uv_a
);
96 vec_st(v_yuy2_0
, (i
<< 1), dst
);
97 vec_st(v_yuy2_1
, (i
<< 1) + 16, dst
);
99 if ((y
& (vertLumPerChroma
- 1)) == vertLumPerChroma
- 1) {
110 static int yv12touyvy_unscaled_altivec(SwsContext
*c
, const uint8_t *src
[],
111 int srcStride
[], int srcSliceY
,
112 int srcSliceH
, uint8_t *dstParam
[],
115 uint8_t *dst
= dstParam
[0] + dstStride_a
[0] * srcSliceY
;
116 // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
117 // srcStride[0], srcStride[1], dstStride[0]);
118 const uint8_t *ysrc
= src
[0];
119 const uint8_t *usrc
= src
[1];
120 const uint8_t *vsrc
= src
[2];
121 const int width
= c
->srcW
;
122 const int height
= srcSliceH
;
123 const int lumStride
= srcStride
[0];
124 const int chromStride
= srcStride
[1];
125 const int dstStride
= dstStride_a
[0];
126 const int vertLumPerChroma
= 2;
127 const vector
unsigned char yperm
= vec_lvsl(0, ysrc
);
128 register unsigned int y
;
130 /* This code assumes:
132 * 1) dst is 16 bytes-aligned
133 * 2) dstStride is a multiple of 16
134 * 3) width is a multiple of 16
135 * 4) lum & chrom stride are multiples of 8
138 for (y
= 0; y
< height
; y
++) {
140 for (i
= 0; i
< width
- 31; i
+= 32) {
141 const unsigned int j
= i
>> 1;
142 vector
unsigned char v_yA
= vec_ld(i
, ysrc
);
143 vector
unsigned char v_yB
= vec_ld(i
+ 16, ysrc
);
144 vector
unsigned char v_yC
= vec_ld(i
+ 32, ysrc
);
145 vector
unsigned char v_y1
= vec_perm(v_yA
, v_yB
, yperm
);
146 vector
unsigned char v_y2
= vec_perm(v_yB
, v_yC
, yperm
);
147 vector
unsigned char v_uA
= vec_ld(j
, usrc
);
148 vector
unsigned char v_uB
= vec_ld(j
+ 16, usrc
);
149 vector
unsigned char v_u
= vec_perm(v_uA
, v_uB
, vec_lvsl(j
, usrc
));
150 vector
unsigned char v_vA
= vec_ld(j
, vsrc
);
151 vector
unsigned char v_vB
= vec_ld(j
+ 16, vsrc
);
152 vector
unsigned char v_v
= vec_perm(v_vA
, v_vB
, vec_lvsl(j
, vsrc
));
153 vector
unsigned char v_uv_a
= vec_mergeh(v_u
, v_v
);
154 vector
unsigned char v_uv_b
= vec_mergel(v_u
, v_v
);
155 vector
unsigned char v_uyvy_0
= vec_mergeh(v_uv_a
, v_y1
);
156 vector
unsigned char v_uyvy_1
= vec_mergel(v_uv_a
, v_y1
);
157 vector
unsigned char v_uyvy_2
= vec_mergeh(v_uv_b
, v_y2
);
158 vector
unsigned char v_uyvy_3
= vec_mergel(v_uv_b
, v_y2
);
159 vec_st(v_uyvy_0
, (i
<< 1), dst
);
160 vec_st(v_uyvy_1
, (i
<< 1) + 16, dst
);
161 vec_st(v_uyvy_2
, (i
<< 1) + 32, dst
);
162 vec_st(v_uyvy_3
, (i
<< 1) + 48, dst
);
165 const unsigned int j
= i
>> 1;
166 vector
unsigned char v_y1
= vec_ld(i
, ysrc
);
167 vector
unsigned char v_u
= vec_ld(j
, usrc
);
168 vector
unsigned char v_v
= vec_ld(j
, vsrc
);
169 vector
unsigned char v_uv_a
= vec_mergeh(v_u
, v_v
);
170 vector
unsigned char v_uyvy_0
= vec_mergeh(v_uv_a
, v_y1
);
171 vector
unsigned char v_uyvy_1
= vec_mergel(v_uv_a
, v_y1
);
172 vec_st(v_uyvy_0
, (i
<< 1), dst
);
173 vec_st(v_uyvy_1
, (i
<< 1) + 16, dst
);
175 if ((y
& (vertLumPerChroma
- 1)) == vertLumPerChroma
- 1) {
185 #endif /* HAVE_ALTIVEC */
187 av_cold
void ff_get_unscaled_swscale_ppc(SwsContext
*c
)
190 if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC
))
193 if (!(c
->srcW
& 15) && !(c
->flags
& SWS_BITEXACT
) &&
194 c
->srcFormat
== AV_PIX_FMT_YUV420P
) {
195 enum AVPixelFormat dstFormat
= c
->dstFormat
;
197 // unscaled YV12 -> packed YUV, we want speed
198 if (dstFormat
== AV_PIX_FMT_YUYV422
)
199 c
->swscale
= yv12toyuy2_unscaled_altivec
;
200 else if (dstFormat
== AV_PIX_FMT_UYVY422
)
201 c
->swscale
= yv12touyvy_unscaled_altivec
;
203 #endif /* HAVE_ALTIVEC */