| 1 | /* |
| 2 | * software RGB to RGB converter |
| 3 | * pluralize by software PAL8 to RGB converter |
| 4 | * software YUV to YUV converter |
| 5 | * software YUV to RGB converter |
| 6 | * Written by Nick Kurshev. |
| 7 | * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) |
| 8 | * lot of big-endian byte order fixes by Alex Beregszaszi |
| 9 | * |
| 10 | * This file is part of FFmpeg. |
| 11 | * |
| 12 | * FFmpeg is free software; you can redistribute it and/or |
| 13 | * modify it under the terms of the GNU Lesser General Public |
| 14 | * License as published by the Free Software Foundation; either |
| 15 | * version 2.1 of the License, or (at your option) any later version. |
| 16 | * |
| 17 | * FFmpeg is distributed in the hope that it will be useful, |
| 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 20 | * Lesser General Public License for more details. |
| 21 | * |
| 22 | * You should have received a copy of the GNU Lesser General Public |
| 23 | * License along with FFmpeg; if not, write to the Free Software |
| 24 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 25 | */ |
| 26 | |
| 27 | #include <stddef.h> |
| 28 | |
| 29 | #include "libavutil/attributes.h" |
| 30 | |
| 31 | static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, |
| 32 | int src_size) |
| 33 | { |
| 34 | uint8_t *dest = dst; |
| 35 | const uint8_t *s = src; |
| 36 | const uint8_t *end = s + src_size; |
| 37 | |
| 38 | while (s < end) { |
| 39 | #if HAVE_BIGENDIAN |
| 40 | /* RGB24 (= R, G, B) -> RGB32 (= A, B, G, R) */ |
| 41 | *dest++ = 255; |
| 42 | *dest++ = s[2]; |
| 43 | *dest++ = s[1]; |
| 44 | *dest++ = s[0]; |
| 45 | s += 3; |
| 46 | #else |
| 47 | *dest++ = *s++; |
| 48 | *dest++ = *s++; |
| 49 | *dest++ = *s++; |
| 50 | *dest++ = 255; |
| 51 | #endif |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, |
| 56 | int src_size) |
| 57 | { |
| 58 | uint8_t *dest = dst; |
| 59 | const uint8_t *s = src; |
| 60 | const uint8_t *end = s + src_size; |
| 61 | |
| 62 | while (s < end) { |
| 63 | #if HAVE_BIGENDIAN |
| 64 | /* RGB32 (= A, B, G, R) -> RGB24 (= R, G, B) */ |
| 65 | s++; |
| 66 | dest[2] = *s++; |
| 67 | dest[1] = *s++; |
| 68 | dest[0] = *s++; |
| 69 | dest += 3; |
| 70 | #else |
| 71 | *dest++ = *s++; |
| 72 | *dest++ = *s++; |
| 73 | *dest++ = *s++; |
| 74 | s++; |
| 75 | #endif |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | /* |
| 80 | * original by Strepto/Astral |
| 81 | * ported to gcc & bugfixed: A'rpi |
| 82 | * MMXEXT, 3DNOW optimization by Nick Kurshev |
| 83 | * 32-bit C version, and and&add trick by Michael Niedermayer |
| 84 | */ |
| 85 | static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size) |
| 86 | { |
| 87 | register uint8_t *d = dst; |
| 88 | register const uint8_t *s = src; |
| 89 | register const uint8_t *end = s + src_size; |
| 90 | const uint8_t *mm_end = end - 3; |
| 91 | |
| 92 | while (s < mm_end) { |
| 93 | register unsigned x = *((const uint32_t *)s); |
| 94 | *((uint32_t *)d) = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0); |
| 95 | d += 4; |
| 96 | s += 4; |
| 97 | } |
| 98 | if (s < end) { |
| 99 | register unsigned short x = *((const uint16_t *)s); |
| 100 | *((uint16_t *)d) = (x & 0x7FFF) + (x & 0x7FE0); |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size) |
| 105 | { |
| 106 | register uint8_t *d = dst; |
| 107 | register const uint8_t *s = src; |
| 108 | register const uint8_t *end = s + src_size; |
| 109 | const uint8_t *mm_end = end - 3; |
| 110 | |
| 111 | while (s < mm_end) { |
| 112 | register uint32_t x = *((const uint32_t *)s); |
| 113 | *((uint32_t *)d) = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F); |
| 114 | s += 4; |
| 115 | d += 4; |
| 116 | } |
| 117 | if (s < end) { |
| 118 | register uint16_t x = *((const uint16_t *)s); |
| 119 | *((uint16_t *)d) = ((x >> 1) & 0x7FE0) | (x & 0x001F); |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size) |
| 124 | { |
| 125 | uint16_t *d = (uint16_t *)dst; |
| 126 | const uint8_t *s = src; |
| 127 | const uint8_t *end = s + src_size; |
| 128 | |
| 129 | while (s < end) { |
| 130 | register int rgb = *(const uint32_t *)s; |
| 131 | s += 4; |
| 132 | *d++ = ((rgb & 0xFF) >> 3) + |
| 133 | ((rgb & 0xFC00) >> 5) + |
| 134 | ((rgb & 0xF80000) >> 8); |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, |
| 139 | int src_size) |
| 140 | { |
| 141 | uint16_t *d = (uint16_t *)dst; |
| 142 | const uint8_t *s = src; |
| 143 | const uint8_t *end = s + src_size; |
| 144 | |
| 145 | while (s < end) { |
| 146 | register int rgb = *(const uint32_t *)s; |
| 147 | s += 4; |
| 148 | *d++ = ((rgb & 0xF8) << 8) + |
| 149 | ((rgb & 0xFC00) >> 5) + |
| 150 | ((rgb & 0xF80000) >> 19); |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size) |
| 155 | { |
| 156 | uint16_t *d = (uint16_t *)dst; |
| 157 | const uint8_t *s = src; |
| 158 | const uint8_t *end = s + src_size; |
| 159 | |
| 160 | while (s < end) { |
| 161 | register int rgb = *(const uint32_t *)s; |
| 162 | s += 4; |
| 163 | *d++ = ((rgb & 0xFF) >> 3) + |
| 164 | ((rgb & 0xF800) >> 6) + |
| 165 | ((rgb & 0xF80000) >> 9); |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, |
| 170 | int src_size) |
| 171 | { |
| 172 | uint16_t *d = (uint16_t *)dst; |
| 173 | const uint8_t *s = src; |
| 174 | const uint8_t *end = s + src_size; |
| 175 | |
| 176 | while (s < end) { |
| 177 | register int rgb = *(const uint32_t *)s; |
| 178 | s += 4; |
| 179 | *d++ = ((rgb & 0xF8) << 7) + |
| 180 | ((rgb & 0xF800) >> 6) + |
| 181 | ((rgb & 0xF80000) >> 19); |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, |
| 186 | int src_size) |
| 187 | { |
| 188 | uint16_t *d = (uint16_t *)dst; |
| 189 | const uint8_t *s = src; |
| 190 | const uint8_t *end = s + src_size; |
| 191 | |
| 192 | while (s < end) { |
| 193 | const int b = *s++; |
| 194 | const int g = *s++; |
| 195 | const int r = *s++; |
| 196 | *d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8); |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size) |
| 201 | { |
| 202 | uint16_t *d = (uint16_t *)dst; |
| 203 | const uint8_t *s = src; |
| 204 | const uint8_t *end = s + src_size; |
| 205 | |
| 206 | while (s < end) { |
| 207 | const int r = *s++; |
| 208 | const int g = *s++; |
| 209 | const int b = *s++; |
| 210 | *d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8); |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, |
| 215 | int src_size) |
| 216 | { |
| 217 | uint16_t *d = (uint16_t *)dst; |
| 218 | const uint8_t *s = src; |
| 219 | const uint8_t *end = s + src_size; |
| 220 | |
| 221 | while (s < end) { |
| 222 | const int b = *s++; |
| 223 | const int g = *s++; |
| 224 | const int r = *s++; |
| 225 | *d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7); |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size) |
| 230 | { |
| 231 | uint16_t *d = (uint16_t *)dst; |
| 232 | const uint8_t *s = src; |
| 233 | const uint8_t *end = s + src_size; |
| 234 | |
| 235 | while (s < end) { |
| 236 | const int r = *s++; |
| 237 | const int g = *s++; |
| 238 | const int b = *s++; |
| 239 | *d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7); |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, |
| 244 | int src_size) |
| 245 | { |
| 246 | uint8_t *d = dst; |
| 247 | const uint16_t *s = (const uint16_t *)src; |
| 248 | const uint16_t *end = s + src_size / 2; |
| 249 | |
| 250 | while (s < end) { |
| 251 | register uint16_t bgr = *s++; |
| 252 | *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
| 253 | *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); |
| 254 | *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, |
| 259 | int src_size) |
| 260 | { |
| 261 | uint8_t *d = (uint8_t *)dst; |
| 262 | const uint16_t *s = (const uint16_t *)src; |
| 263 | const uint16_t *end = s + src_size / 2; |
| 264 | |
| 265 | while (s < end) { |
| 266 | register uint16_t bgr = *s++; |
| 267 | *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
| 268 | *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); |
| 269 | *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size) |
| 274 | { |
| 275 | uint8_t *d = dst; |
| 276 | const uint16_t *s = (const uint16_t *)src; |
| 277 | const uint16_t *end = s + src_size / 2; |
| 278 | |
| 279 | while (s < end) { |
| 280 | register uint16_t bgr = *s++; |
| 281 | #if HAVE_BIGENDIAN |
| 282 | *d++ = 255; |
| 283 | *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); |
| 284 | *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); |
| 285 | *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
| 286 | #else |
| 287 | *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
| 288 | *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); |
| 289 | *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); |
| 290 | *d++ = 255; |
| 291 | #endif |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size) |
| 296 | { |
| 297 | uint8_t *d = dst; |
| 298 | const uint16_t *s = (const uint16_t *)src; |
| 299 | const uint16_t *end = s + src_size / 2; |
| 300 | |
| 301 | while (s < end) { |
| 302 | register uint16_t bgr = *s++; |
| 303 | #if HAVE_BIGENDIAN |
| 304 | *d++ = 255; |
| 305 | *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); |
| 306 | *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); |
| 307 | *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
| 308 | #else |
| 309 | *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); |
| 310 | *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); |
| 311 | *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); |
| 312 | *d++ = 255; |
| 313 | #endif |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, |
| 318 | int src_size) |
| 319 | { |
| 320 | int idx = 15 - src_size; |
| 321 | const uint8_t *s = src - idx; |
| 322 | uint8_t *d = dst - idx; |
| 323 | |
| 324 | for (; idx < 15; idx += 4) { |
| 325 | register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; |
| 326 | v &= 0xff00ff; |
| 327 | *(uint32_t *)&d[idx] = (v >> 16) + g + (v << 16); |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size) |
| 332 | { |
| 333 | unsigned i; |
| 334 | |
| 335 | for (i = 0; i < src_size; i += 3) { |
| 336 | register uint8_t x = src[i + 2]; |
| 337 | dst[i + 1] = src[i + 1]; |
| 338 | dst[i + 2] = src[i + 0]; |
| 339 | dst[i + 0] = x; |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, |
| 344 | const uint8_t *vsrc, uint8_t *dst, |
| 345 | int width, int height, |
| 346 | int lumStride, int chromStride, |
| 347 | int dstStride, int vertLumPerChroma) |
| 348 | { |
| 349 | int y, i; |
| 350 | const int chromWidth = width >> 1; |
| 351 | |
| 352 | for (y = 0; y < height; y++) { |
| 353 | #if HAVE_FAST_64BIT |
| 354 | uint64_t *ldst = (uint64_t *)dst; |
| 355 | const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
| 356 | for (i = 0; i < chromWidth; i += 2) { |
| 357 | uint64_t k = yc[0] + (uc[0] << 8) + |
| 358 | (yc[1] << 16) + ((unsigned) vc[0] << 24); |
| 359 | uint64_t l = yc[2] + (uc[1] << 8) + |
| 360 | (yc[3] << 16) + ((unsigned) vc[1] << 24); |
| 361 | *ldst++ = k + (l << 32); |
| 362 | yc += 4; |
| 363 | uc += 2; |
| 364 | vc += 2; |
| 365 | } |
| 366 | |
| 367 | #else |
| 368 | int *idst = (int32_t *)dst; |
| 369 | const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
| 370 | |
| 371 | for (i = 0; i < chromWidth; i++) { |
| 372 | #if HAVE_BIGENDIAN |
| 373 | *idst++ = (yc[0] << 24) + (uc[0] << 16) + |
| 374 | (yc[1] << 8) + (vc[0] << 0); |
| 375 | #else |
| 376 | *idst++ = yc[0] + (uc[0] << 8) + |
| 377 | (yc[1] << 16) + (vc[0] << 24); |
| 378 | #endif |
| 379 | yc += 2; |
| 380 | uc++; |
| 381 | vc++; |
| 382 | } |
| 383 | #endif |
| 384 | if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { |
| 385 | usrc += chromStride; |
| 386 | vsrc += chromStride; |
| 387 | } |
| 388 | ysrc += lumStride; |
| 389 | dst += dstStride; |
| 390 | } |
| 391 | } |
| 392 | |
| 393 | /** |
| 394 | * Height should be a multiple of 2 and width should be a multiple of 16. |
| 395 | * (If this is a problem for anyone then tell me, and I will fix it.) |
| 396 | */ |
| 397 | static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, |
| 398 | const uint8_t *vsrc, uint8_t *dst, |
| 399 | int width, int height, int lumStride, |
| 400 | int chromStride, int dstStride) |
| 401 | { |
| 402 | //FIXME interpolate chroma |
| 403 | yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride, |
| 404 | chromStride, dstStride, 2); |
| 405 | } |
| 406 | |
| 407 | static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc, |
| 408 | const uint8_t *vsrc, uint8_t *dst, |
| 409 | int width, int height, |
| 410 | int lumStride, int chromStride, |
| 411 | int dstStride, int vertLumPerChroma) |
| 412 | { |
| 413 | int y, i; |
| 414 | const int chromWidth = width >> 1; |
| 415 | |
| 416 | for (y = 0; y < height; y++) { |
| 417 | #if HAVE_FAST_64BIT |
| 418 | uint64_t *ldst = (uint64_t *)dst; |
| 419 | const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
| 420 | for (i = 0; i < chromWidth; i += 2) { |
| 421 | uint64_t k = uc[0] + (yc[0] << 8) + |
| 422 | (vc[0] << 16) + ((unsigned) yc[1] << 24); |
| 423 | uint64_t l = uc[1] + (yc[2] << 8) + |
| 424 | (vc[1] << 16) + ((unsigned) yc[3] << 24); |
| 425 | *ldst++ = k + (l << 32); |
| 426 | yc += 4; |
| 427 | uc += 2; |
| 428 | vc += 2; |
| 429 | } |
| 430 | |
| 431 | #else |
| 432 | int *idst = (int32_t *)dst; |
| 433 | const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
| 434 | |
| 435 | for (i = 0; i < chromWidth; i++) { |
| 436 | #if HAVE_BIGENDIAN |
| 437 | *idst++ = (uc[0] << 24) + (yc[0] << 16) + |
| 438 | (vc[0] << 8) + (yc[1] << 0); |
| 439 | #else |
| 440 | *idst++ = uc[0] + (yc[0] << 8) + |
| 441 | (vc[0] << 16) + (yc[1] << 24); |
| 442 | #endif |
| 443 | yc += 2; |
| 444 | uc++; |
| 445 | vc++; |
| 446 | } |
| 447 | #endif |
| 448 | if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { |
| 449 | usrc += chromStride; |
| 450 | vsrc += chromStride; |
| 451 | } |
| 452 | ysrc += lumStride; |
| 453 | dst += dstStride; |
| 454 | } |
| 455 | } |
| 456 | |
| 457 | /** |
| 458 | * Height should be a multiple of 2 and width should be a multiple of 16 |
| 459 | * (If this is a problem for anyone then tell me, and I will fix it.) |
| 460 | */ |
| 461 | static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc, |
| 462 | const uint8_t *vsrc, uint8_t *dst, |
| 463 | int width, int height, int lumStride, |
| 464 | int chromStride, int dstStride) |
| 465 | { |
| 466 | //FIXME interpolate chroma |
| 467 | yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride, |
| 468 | chromStride, dstStride, 2); |
| 469 | } |
| 470 | |
| 471 | /** |
| 472 | * Width should be a multiple of 16. |
| 473 | */ |
| 474 | static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc, |
| 475 | const uint8_t *vsrc, uint8_t *dst, |
| 476 | int width, int height, int lumStride, |
| 477 | int chromStride, int dstStride) |
| 478 | { |
| 479 | yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride, |
| 480 | chromStride, dstStride, 1); |
| 481 | } |
| 482 | |
| 483 | /** |
| 484 | * Width should be a multiple of 16. |
| 485 | */ |
| 486 | static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, |
| 487 | const uint8_t *vsrc, uint8_t *dst, |
| 488 | int width, int height, int lumStride, |
| 489 | int chromStride, int dstStride) |
| 490 | { |
| 491 | yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride, |
| 492 | chromStride, dstStride, 1); |
| 493 | } |
| 494 | |
| 495 | /** |
| 496 | * Height should be a multiple of 2 and width should be a multiple of 16. |
| 497 | * (If this is a problem for anyone then tell me, and I will fix it.) |
| 498 | */ |
| 499 | static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst, |
| 500 | uint8_t *udst, uint8_t *vdst, |
| 501 | int width, int height, int lumStride, |
| 502 | int chromStride, int srcStride) |
| 503 | { |
| 504 | int y; |
| 505 | const int chromWidth = width >> 1; |
| 506 | |
| 507 | for (y = 0; y < height; y += 2) { |
| 508 | int i; |
| 509 | for (i = 0; i < chromWidth; i++) { |
| 510 | ydst[2 * i + 0] = src[4 * i + 0]; |
| 511 | udst[i] = src[4 * i + 1]; |
| 512 | ydst[2 * i + 1] = src[4 * i + 2]; |
| 513 | vdst[i] = src[4 * i + 3]; |
| 514 | } |
| 515 | ydst += lumStride; |
| 516 | src += srcStride; |
| 517 | |
| 518 | for (i = 0; i < chromWidth; i++) { |
| 519 | ydst[2 * i + 0] = src[4 * i + 0]; |
| 520 | ydst[2 * i + 1] = src[4 * i + 2]; |
| 521 | } |
| 522 | udst += chromStride; |
| 523 | vdst += chromStride; |
| 524 | ydst += lumStride; |
| 525 | src += srcStride; |
| 526 | } |
| 527 | } |
| 528 | |
| 529 | static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth, |
| 530 | int srcHeight, int srcStride, int dstStride) |
| 531 | { |
| 532 | int x, y; |
| 533 | |
| 534 | dst[0] = src[0]; |
| 535 | |
| 536 | // first line |
| 537 | for (x = 0; x < srcWidth - 1; x++) { |
| 538 | dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2; |
| 539 | dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2; |
| 540 | } |
| 541 | dst[2 * srcWidth - 1] = src[srcWidth - 1]; |
| 542 | |
| 543 | dst += dstStride; |
| 544 | |
| 545 | for (y = 1; y < srcHeight; y++) { |
| 546 | const int mmxSize = 1; |
| 547 | |
| 548 | dst[0] = (src[0] * 3 + src[srcStride]) >> 2; |
| 549 | dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2; |
| 550 | |
| 551 | for (x = mmxSize - 1; x < srcWidth - 1; x++) { |
| 552 | dst[2 * x + 1] = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2; |
| 553 | dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2; |
| 554 | dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride]) >> 2; |
| 555 | dst[2 * x + 2] = (src[x + 1] * 3 + src[x + srcStride]) >> 2; |
| 556 | } |
| 557 | dst[srcWidth * 2 - 1] = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2; |
| 558 | dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2; |
| 559 | |
| 560 | dst += dstStride * 2; |
| 561 | src += srcStride; |
| 562 | } |
| 563 | |
| 564 | // last line |
| 565 | dst[0] = src[0]; |
| 566 | |
| 567 | for (x = 0; x < srcWidth - 1; x++) { |
| 568 | dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2; |
| 569 | dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2; |
| 570 | } |
| 571 | dst[2 * srcWidth - 1] = src[srcWidth - 1]; |
| 572 | } |
| 573 | |
| 574 | /** |
| 575 | * Height should be a multiple of 2 and width should be a multiple of 16. |
| 576 | * (If this is a problem for anyone then tell me, and I will fix it.) |
| 577 | * Chrominance data is only taken from every second line, others are ignored. |
| 578 | * FIXME: Write HQ version. |
| 579 | */ |
| 580 | static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst, |
| 581 | uint8_t *udst, uint8_t *vdst, |
| 582 | int width, int height, int lumStride, |
| 583 | int chromStride, int srcStride) |
| 584 | { |
| 585 | int y; |
| 586 | const int chromWidth = width >> 1; |
| 587 | |
| 588 | for (y = 0; y < height; y += 2) { |
| 589 | int i; |
| 590 | for (i = 0; i < chromWidth; i++) { |
| 591 | udst[i] = src[4 * i + 0]; |
| 592 | ydst[2 * i + 0] = src[4 * i + 1]; |
| 593 | vdst[i] = src[4 * i + 2]; |
| 594 | ydst[2 * i + 1] = src[4 * i + 3]; |
| 595 | } |
| 596 | ydst += lumStride; |
| 597 | src += srcStride; |
| 598 | |
| 599 | for (i = 0; i < chromWidth; i++) { |
| 600 | ydst[2 * i + 0] = src[4 * i + 1]; |
| 601 | ydst[2 * i + 1] = src[4 * i + 3]; |
| 602 | } |
| 603 | udst += chromStride; |
| 604 | vdst += chromStride; |
| 605 | ydst += lumStride; |
| 606 | src += srcStride; |
| 607 | } |
| 608 | } |
| 609 | |
| 610 | /** |
| 611 | * Height should be a multiple of 2 and width should be a multiple of 2. |
| 612 | * (If this is a problem for anyone then tell me, and I will fix it.) |
| 613 | * Chrominance data is only taken from every second line, |
| 614 | * others are ignored in the C version. |
| 615 | * FIXME: Write HQ version. |
| 616 | */ |
| 617 | void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, |
| 618 | uint8_t *vdst, int width, int height, int lumStride, |
| 619 | int chromStride, int srcStride, int32_t *rgb2yuv) |
| 620 | { |
| 621 | int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; |
| 622 | int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; |
| 623 | int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; |
| 624 | int y; |
| 625 | const int chromWidth = width >> 1; |
| 626 | |
| 627 | for (y = 0; y < height; y += 2) { |
| 628 | int i; |
| 629 | for (i = 0; i < chromWidth; i++) { |
| 630 | unsigned int b = src[6 * i + 0]; |
| 631 | unsigned int g = src[6 * i + 1]; |
| 632 | unsigned int r = src[6 * i + 2]; |
| 633 | |
| 634 | unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; |
| 635 | unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; |
| 636 | unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; |
| 637 | |
| 638 | udst[i] = U; |
| 639 | vdst[i] = V; |
| 640 | ydst[2 * i] = Y; |
| 641 | |
| 642 | b = src[6 * i + 3]; |
| 643 | g = src[6 * i + 4]; |
| 644 | r = src[6 * i + 5]; |
| 645 | |
| 646 | Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; |
| 647 | ydst[2 * i + 1] = Y; |
| 648 | } |
| 649 | ydst += lumStride; |
| 650 | src += srcStride; |
| 651 | |
| 652 | if (y+1 == height) |
| 653 | break; |
| 654 | |
| 655 | for (i = 0; i < chromWidth; i++) { |
| 656 | unsigned int b = src[6 * i + 0]; |
| 657 | unsigned int g = src[6 * i + 1]; |
| 658 | unsigned int r = src[6 * i + 2]; |
| 659 | |
| 660 | unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; |
| 661 | |
| 662 | ydst[2 * i] = Y; |
| 663 | |
| 664 | b = src[6 * i + 3]; |
| 665 | g = src[6 * i + 4]; |
| 666 | r = src[6 * i + 5]; |
| 667 | |
| 668 | Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; |
| 669 | ydst[2 * i + 1] = Y; |
| 670 | } |
| 671 | udst += chromStride; |
| 672 | vdst += chromStride; |
| 673 | ydst += lumStride; |
| 674 | src += srcStride; |
| 675 | } |
| 676 | } |
| 677 | |
| 678 | static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2, |
| 679 | uint8_t *dest, int width, int height, |
| 680 | int src1Stride, int src2Stride, int dstStride) |
| 681 | { |
| 682 | int h; |
| 683 | |
| 684 | for (h = 0; h < height; h++) { |
| 685 | int w; |
| 686 | for (w = 0; w < width; w++) { |
| 687 | dest[2 * w + 0] = src1[w]; |
| 688 | dest[2 * w + 1] = src2[w]; |
| 689 | } |
| 690 | dest += dstStride; |
| 691 | src1 += src1Stride; |
| 692 | src2 += src2Stride; |
| 693 | } |
| 694 | } |
| 695 | |
| 696 | static void deinterleaveBytes_c(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, |
| 697 | int width, int height, int srcStride, |
| 698 | int dst1Stride, int dst2Stride) |
| 699 | { |
| 700 | int h; |
| 701 | |
| 702 | for (h = 0; h < height; h++) { |
| 703 | int w; |
| 704 | for (w = 0; w < width; w++) { |
| 705 | dst1[w] = src[2 * w + 0]; |
| 706 | dst2[w] = src[2 * w + 1]; |
| 707 | } |
| 708 | src += srcStride; |
| 709 | dst1 += dst1Stride; |
| 710 | dst2 += dst2Stride; |
| 711 | } |
| 712 | } |
| 713 | |
| 714 | static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2, |
| 715 | uint8_t *dst1, uint8_t *dst2, |
| 716 | int width, int height, |
| 717 | int srcStride1, int srcStride2, |
| 718 | int dstStride1, int dstStride2) |
| 719 | { |
| 720 | int x, y; |
| 721 | int w = width / 2; |
| 722 | int h = height / 2; |
| 723 | |
| 724 | for (y = 0; y < h; y++) { |
| 725 | const uint8_t *s1 = src1 + srcStride1 * (y >> 1); |
| 726 | uint8_t *d = dst1 + dstStride1 * y; |
| 727 | for (x = 0; x < w; x++) |
| 728 | d[2 * x] = d[2 * x + 1] = s1[x]; |
| 729 | } |
| 730 | for (y = 0; y < h; y++) { |
| 731 | const uint8_t *s2 = src2 + srcStride2 * (y >> 1); |
| 732 | uint8_t *d = dst2 + dstStride2 * y; |
| 733 | for (x = 0; x < w; x++) |
| 734 | d[2 * x] = d[2 * x + 1] = s2[x]; |
| 735 | } |
| 736 | } |
| 737 | |
| 738 | static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2, |
| 739 | const uint8_t *src3, uint8_t *dst, |
| 740 | int width, int height, |
| 741 | int srcStride1, int srcStride2, |
| 742 | int srcStride3, int dstStride) |
| 743 | { |
| 744 | int x, y; |
| 745 | int w = width / 2; |
| 746 | int h = height; |
| 747 | |
| 748 | for (y = 0; y < h; y++) { |
| 749 | const uint8_t *yp = src1 + srcStride1 * y; |
| 750 | const uint8_t *up = src2 + srcStride2 * (y >> 2); |
| 751 | const uint8_t *vp = src3 + srcStride3 * (y >> 2); |
| 752 | uint8_t *d = dst + dstStride * y; |
| 753 | for (x = 0; x < w; x++) { |
| 754 | const int x2 = x << 2; |
| 755 | d[8 * x + 0] = yp[x2]; |
| 756 | d[8 * x + 1] = up[x]; |
| 757 | d[8 * x + 2] = yp[x2 + 1]; |
| 758 | d[8 * x + 3] = vp[x]; |
| 759 | d[8 * x + 4] = yp[x2 + 2]; |
| 760 | d[8 * x + 5] = up[x]; |
| 761 | d[8 * x + 6] = yp[x2 + 3]; |
| 762 | d[8 * x + 7] = vp[x]; |
| 763 | } |
| 764 | } |
| 765 | } |
| 766 | |
| 767 | static void extract_even_c(const uint8_t *src, uint8_t *dst, int count) |
| 768 | { |
| 769 | dst += count; |
| 770 | src += count * 2; |
| 771 | count = -count; |
| 772 | while (count < 0) { |
| 773 | dst[count] = src[2 * count]; |
| 774 | count++; |
| 775 | } |
| 776 | } |
| 777 | |
| 778 | static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, |
| 779 | int count) |
| 780 | { |
| 781 | dst0 += count; |
| 782 | dst1 += count; |
| 783 | src += count * 4; |
| 784 | count = -count; |
| 785 | while (count < 0) { |
| 786 | dst0[count] = src[4 * count + 0]; |
| 787 | dst1[count] = src[4 * count + 2]; |
| 788 | count++; |
| 789 | } |
| 790 | } |
| 791 | |
| 792 | static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1, |
| 793 | uint8_t *dst0, uint8_t *dst1, int count) |
| 794 | { |
| 795 | dst0 += count; |
| 796 | dst1 += count; |
| 797 | src0 += count * 4; |
| 798 | src1 += count * 4; |
| 799 | count = -count; |
| 800 | while (count < 0) { |
| 801 | dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1; |
| 802 | dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1; |
| 803 | count++; |
| 804 | } |
| 805 | } |
| 806 | |
| 807 | static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, |
| 808 | int count) |
| 809 | { |
| 810 | dst0 += count; |
| 811 | dst1 += count; |
| 812 | src += count * 4; |
| 813 | count = -count; |
| 814 | src++; |
| 815 | while (count < 0) { |
| 816 | dst0[count] = src[4 * count + 0]; |
| 817 | dst1[count] = src[4 * count + 2]; |
| 818 | count++; |
| 819 | } |
| 820 | } |
| 821 | |
| 822 | static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1, |
| 823 | uint8_t *dst0, uint8_t *dst1, int count) |
| 824 | { |
| 825 | dst0 += count; |
| 826 | dst1 += count; |
| 827 | src0 += count * 4; |
| 828 | src1 += count * 4; |
| 829 | count = -count; |
| 830 | src0++; |
| 831 | src1++; |
| 832 | while (count < 0) { |
| 833 | dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1; |
| 834 | dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1; |
| 835 | count++; |
| 836 | } |
| 837 | } |
| 838 | |
| 839 | static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
| 840 | const uint8_t *src, int width, int height, |
| 841 | int lumStride, int chromStride, int srcStride) |
| 842 | { |
| 843 | int y; |
| 844 | const int chromWidth = FF_CEIL_RSHIFT(width, 1); |
| 845 | |
| 846 | for (y = 0; y < height; y++) { |
| 847 | extract_even_c(src, ydst, width); |
| 848 | if (y & 1) { |
| 849 | extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth); |
| 850 | udst += chromStride; |
| 851 | vdst += chromStride; |
| 852 | } |
| 853 | |
| 854 | src += srcStride; |
| 855 | ydst += lumStride; |
| 856 | } |
| 857 | } |
| 858 | |
| 859 | static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
| 860 | const uint8_t *src, int width, int height, |
| 861 | int lumStride, int chromStride, int srcStride) |
| 862 | { |
| 863 | int y; |
| 864 | const int chromWidth = FF_CEIL_RSHIFT(width, 1); |
| 865 | |
| 866 | for (y = 0; y < height; y++) { |
| 867 | extract_even_c(src, ydst, width); |
| 868 | extract_odd2_c(src, udst, vdst, chromWidth); |
| 869 | |
| 870 | src += srcStride; |
| 871 | ydst += lumStride; |
| 872 | udst += chromStride; |
| 873 | vdst += chromStride; |
| 874 | } |
| 875 | } |
| 876 | |
| 877 | static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
| 878 | const uint8_t *src, int width, int height, |
| 879 | int lumStride, int chromStride, int srcStride) |
| 880 | { |
| 881 | int y; |
| 882 | const int chromWidth = FF_CEIL_RSHIFT(width, 1); |
| 883 | |
| 884 | for (y = 0; y < height; y++) { |
| 885 | extract_even_c(src + 1, ydst, width); |
| 886 | if (y & 1) { |
| 887 | extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth); |
| 888 | udst += chromStride; |
| 889 | vdst += chromStride; |
| 890 | } |
| 891 | |
| 892 | src += srcStride; |
| 893 | ydst += lumStride; |
| 894 | } |
| 895 | } |
| 896 | |
| 897 | static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
| 898 | const uint8_t *src, int width, int height, |
| 899 | int lumStride, int chromStride, int srcStride) |
| 900 | { |
| 901 | int y; |
| 902 | const int chromWidth = FF_CEIL_RSHIFT(width, 1); |
| 903 | |
| 904 | for (y = 0; y < height; y++) { |
| 905 | extract_even_c(src + 1, ydst, width); |
| 906 | extract_even2_c(src, udst, vdst, chromWidth); |
| 907 | |
| 908 | src += srcStride; |
| 909 | ydst += lumStride; |
| 910 | udst += chromStride; |
| 911 | vdst += chromStride; |
| 912 | } |
| 913 | } |
| 914 | |
| 915 | static av_cold void rgb2rgb_init_c(void) |
| 916 | { |
| 917 | rgb15to16 = rgb15to16_c; |
| 918 | rgb15tobgr24 = rgb15tobgr24_c; |
| 919 | rgb15to32 = rgb15to32_c; |
| 920 | rgb16tobgr24 = rgb16tobgr24_c; |
| 921 | rgb16to32 = rgb16to32_c; |
| 922 | rgb16to15 = rgb16to15_c; |
| 923 | rgb24tobgr16 = rgb24tobgr16_c; |
| 924 | rgb24tobgr15 = rgb24tobgr15_c; |
| 925 | rgb24tobgr32 = rgb24tobgr32_c; |
| 926 | rgb32to16 = rgb32to16_c; |
| 927 | rgb32to15 = rgb32to15_c; |
| 928 | rgb32tobgr24 = rgb32tobgr24_c; |
| 929 | rgb24to15 = rgb24to15_c; |
| 930 | rgb24to16 = rgb24to16_c; |
| 931 | rgb24tobgr24 = rgb24tobgr24_c; |
| 932 | shuffle_bytes_2103 = shuffle_bytes_2103_c; |
| 933 | rgb32tobgr16 = rgb32tobgr16_c; |
| 934 | rgb32tobgr15 = rgb32tobgr15_c; |
| 935 | yv12toyuy2 = yv12toyuy2_c; |
| 936 | yv12touyvy = yv12touyvy_c; |
| 937 | yuv422ptoyuy2 = yuv422ptoyuy2_c; |
| 938 | yuv422ptouyvy = yuv422ptouyvy_c; |
| 939 | yuy2toyv12 = yuy2toyv12_c; |
| 940 | planar2x = planar2x_c; |
| 941 | ff_rgb24toyv12 = ff_rgb24toyv12_c; |
| 942 | interleaveBytes = interleaveBytes_c; |
| 943 | deinterleaveBytes = deinterleaveBytes_c; |
| 944 | vu9_to_vu12 = vu9_to_vu12_c; |
| 945 | yvu9_to_yuy2 = yvu9_to_yuy2_c; |
| 946 | |
| 947 | uyvytoyuv420 = uyvytoyuv420_c; |
| 948 | uyvytoyuv422 = uyvytoyuv422_c; |
| 949 | yuyvtoyuv420 = yuyvtoyuv420_c; |
| 950 | yuyvtoyuv422 = yuyvtoyuv422_c; |
| 951 | } |