X-Git-Url: https://git.piment-noir.org/?p=deb_ffmpeg.git;a=blobdiff_plain;f=ffmpeg%2Flibavcodec%2Fppc%2Fme_cmp.c;h=38a7ba147686f2ff16c861c5a77061f55268ff20;hp=18f2c6e499e36071aea1cc4fc9dbe3448719efab;hb=f6fa7814ccfe3e76514b36cf04f5cd3cb657c8cf;hpb=2ba45a602cbfa7b771effba9b11bb4245c21bc00 diff --git a/ffmpeg/libavcodec/ppc/me_cmp.c b/ffmpeg/libavcodec/ppc/me_cmp.c index 18f2c6e..38a7ba1 100644 --- a/ffmpeg/libavcodec/ppc/me_cmp.c +++ b/ffmpeg/libavcodec/ppc/me_cmp.c @@ -35,26 +35,43 @@ #include "libavcodec/me_cmp.h" #if HAVE_ALTIVEC + +#if HAVE_BIGENDIAN +#define GET_PERM(per1, per2, pix) {\ + per1 = vec_lvsl(0, pix);\ + per2 = vec_add(per1, vec_splat_u8(1));\ +} +#define LOAD_PIX(v, iv, pix, per1, per2) {\ + vector unsigned char pix2l = vec_ld(0, pix);\ + vector unsigned char pix2r = vec_ld(16, pix);\ + v = vec_perm(pix2l, pix2r, per1);\ + iv = vec_perm(pix2l, pix2r, per2);\ +} +#else +#define GET_PERM(per1, per2, pix) {} +#define LOAD_PIX(v, iv, pix, per1, per2) {\ + v = vec_vsx_ld(0, pix);\ + iv = vec_vsx_ld(1, pix);\ +} +#endif static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) + ptrdiff_t stride, int h) { - int i, s = 0; + int i; + int __attribute__((aligned(16))) s = 0; const vector unsigned char zero = (const vector unsigned char) vec_splat_u8(0); - vector unsigned char perm1 = vec_lvsl(0, pix2); - vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1)); vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); vector signed int sumdiffs; + vector unsigned char perm1, perm2, pix2v, pix2iv; + GET_PERM(perm1, perm2, pix2); for (i = 0; i < h; i++) { /* Read unaligned pixels into our vectors. The vectors are as follows: * pix1v: pix1[0] - pix1[15] * pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] */ vector unsigned char pix1v = vec_ld(0, pix1); - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(16, pix2); - vector unsigned char pix2v = vec_perm(pix2l, pix2r, perm1); - vector unsigned char pix2iv = vec_perm(pix2l, pix2r, perm2); + LOAD_PIX(pix2v, pix2iv, pix2, perm1, perm2); /* Calculate the average vector. */ vector unsigned char avgv = vec_avg(pix2v, pix2iv); @@ -66,8 +83,8 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Add each 4 pixel group together and put 4 results into sad. */ sad = vec_sum4s(t5, sad); - pix1 += line_size; - pix2 += line_size; + pix1 += stride; + pix2 += stride; } /* Sum up the four partial sums, and put the result into s. */ sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); @@ -78,37 +95,33 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, } static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) + ptrdiff_t stride, int h) { - int i, s = 0; + int i; + int __attribute__((aligned(16))) s = 0; const vector unsigned char zero = (const vector unsigned char) vec_splat_u8(0); - vector unsigned char perm = vec_lvsl(0, pix2); vector unsigned char pix1v, pix3v, avgv, t5; vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); vector signed int sumdiffs; - uint8_t *pix3 = pix2 + line_size; - /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one + uint8_t *pix3 = pix2 + stride; + + /* Due to the fact that pix3 = pix2 + stride, the pix3 of one * iteration becomes pix2 in the next iteration. We can use this * fact to avoid a potentially expensive unaligned read, each * time around the loop. * Read unaligned pixels into our vectors. The vectors are as follows: * pix2v: pix2[0] - pix2[15] * Split the pixel vectors into shorts. */ - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(15, pix2); - vector unsigned char pix2v = vec_perm(pix2l, pix2r, perm); + vector unsigned char pix2v = VEC_LD(0, pix2); for (i = 0; i < h; i++) { /* Read unaligned pixels into our vectors. The vectors are as follows: * pix1v: pix1[0] - pix1[15] * pix3v: pix3[0] - pix3[15] */ pix1v = vec_ld(0, pix1); - - pix2l = vec_ld(0, pix3); - pix2r = vec_ld(15, pix3); - pix3v = vec_perm(pix2l, pix2r, perm); + pix3v = VEC_LD(0, pix3); /* Calculate the average vector. */ avgv = vec_avg(pix2v, pix3v); @@ -119,9 +132,9 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Add each 4 pixel group together and put 4 results into sad. */ sad = vec_sum4s(t5, sad); - pix1 += line_size; + pix1 += stride; pix2v = pix3v; - pix3 += line_size; + pix3 += stride; } /* Sum up the four partial sums, and put the result into s. */ @@ -132,43 +145,41 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, } static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) + ptrdiff_t stride, int h) { - int i, s = 0; - uint8_t *pix3 = pix2 + line_size; + int i; + int __attribute__((aligned(16))) s = 0; + uint8_t *pix3 = pix2 + stride; const vector unsigned char zero = (const vector unsigned char) vec_splat_u8(0); const vector unsigned short two = (const vector unsigned short) vec_splat_u16(2); vector unsigned char avgv, t5; - vector unsigned char perm1 = vec_lvsl(0, pix2); - vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1)); vector unsigned char pix1v, pix3v, pix3iv; vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; vector unsigned short avghv, avglv; vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); vector signed int sumdiffs; + vector unsigned char perm1, perm2, pix2v, pix2iv; + GET_PERM(perm1, perm2, pix2); - /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one + /* Due to the fact that pix3 = pix2 + stride, the pix3 of one * iteration becomes pix2 in the next iteration. We can use this * fact to avoid a potentially expensive unaligned read, as well * as some splitting, and vector addition each time around the loop. * Read unaligned pixels into our vectors. The vectors are as follows: * pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] * Split the pixel vectors into shorts. */ - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(16, pix2); - vector unsigned char pix2v = vec_perm(pix2l, pix2r, perm1); - vector unsigned char pix2iv = vec_perm(pix2l, pix2r, perm2); - + LOAD_PIX(pix2v, pix2iv, pix2, perm1, perm2); vector unsigned short pix2hv = - (vector unsigned short) vec_mergeh(zero, pix2v); + (vector unsigned short) VEC_MERGEH(zero, pix2v); vector unsigned short pix2lv = - (vector unsigned short) vec_mergel(zero, pix2v); + (vector unsigned short) VEC_MERGEL(zero, pix2v); vector unsigned short pix2ihv = - (vector unsigned short) vec_mergeh(zero, pix2iv); + (vector unsigned short) VEC_MERGEH(zero, pix2iv); vector unsigned short pix2ilv = - (vector unsigned short) vec_mergel(zero, pix2iv); + (vector unsigned short) VEC_MERGEL(zero, pix2iv); + vector unsigned short t1 = vec_add(pix2hv, pix2ihv); vector unsigned short t2 = vec_add(pix2lv, pix2ilv); vector unsigned short t3, t4; @@ -178,11 +189,7 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, * pix1v: pix1[0] - pix1[15] * pix3v: pix3[0] - pix3[15] pix3iv: pix3[1] - pix3[16] */ pix1v = vec_ld(0, pix1); - - pix2l = vec_ld(0, pix3); - pix2r = vec_ld(16, pix3); - pix3v = vec_perm(pix2l, pix2r, perm1); - pix3iv = vec_perm(pix2l, pix2r, perm2); + LOAD_PIX(pix3v, pix3iv, pix3, perm1, perm2); /* Note that AltiVec does have vec_avg, but this works on vector pairs * and rounds up. We could do avg(avg(a, b), avg(c, d)), but the @@ -191,10 +198,10 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, * vectors of shorts and do the averaging by hand. */ /* Split the pixel vectors into shorts. */ - pix3hv = (vector unsigned short) vec_mergeh(zero, pix3v); - pix3lv = (vector unsigned short) vec_mergel(zero, pix3v); - pix3ihv = (vector unsigned short) vec_mergeh(zero, pix3iv); - pix3ilv = (vector unsigned short) vec_mergel(zero, pix3iv); + pix3hv = (vector unsigned short) VEC_MERGEH(zero, pix3v); + pix3lv = (vector unsigned short) VEC_MERGEL(zero, pix3v); + pix3ihv = (vector unsigned short) VEC_MERGEH(zero, pix3iv); + pix3ilv = (vector unsigned short) VEC_MERGEL(zero, pix3iv); /* Do the averaging on them. */ t3 = vec_add(pix3hv, pix3ihv); @@ -212,8 +219,8 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Add each 4 pixel group together and put 4 results into sad. */ sad = vec_sum4s(t5, sad); - pix1 += line_size; - pix3 += line_size; + pix1 += stride; + pix3 += stride; /* Transfer the calculated values for pix3 into pix2. */ t1 = t3; t2 = t4; @@ -227,21 +234,19 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, } static int sad16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) + ptrdiff_t stride, int h) { - int i, s; + int i; + int __attribute__((aligned(16))) s; const vector unsigned int zero = (const vector unsigned int) vec_splat_u32(0); - vector unsigned char perm = vec_lvsl(0, pix2); vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); vector signed int sumdiffs; for (i = 0; i < h; i++) { /* Read potentially unaligned pixels into t1 and t2. */ - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(15, pix2); - vector unsigned char t1 = vec_ld(0, pix1); - vector unsigned char t2 = vec_perm(pix2l, pix2r, perm); + vector unsigned char t1 =vec_ld(0, pix1); + vector unsigned char t2 = VEC_LD(0, pix2); /* Calculate a sum of abs differences vector. */ vector unsigned char t3 = vec_max(t1, t2); @@ -251,8 +256,8 @@ static int sad16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Add each 4 pixel group together and put 4 results into sad. */ sad = vec_sum4s(t5, sad); - pix1 += line_size; - pix2 += line_size; + pix1 += stride; + pix2 += stride; } /* Sum up the four partial sums, and put the result into s. */ @@ -264,16 +269,15 @@ static int sad16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, } static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) + ptrdiff_t stride, int h) { - int i, s; + int i; + int __attribute__((aligned(16))) s; const vector unsigned int zero = (const vector unsigned int) vec_splat_u32(0); const vector unsigned char permclear = (vector unsigned char) { 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 }; - vector unsigned char perm1 = vec_lvsl(0, pix1); - vector unsigned char perm2 = vec_lvsl(0, pix2); vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); vector signed int sumdiffs; @@ -281,14 +285,10 @@ static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Read potentially unaligned pixels into t1 and t2. * Since we're reading 16 pixels, and actually only want 8, * mask out the last 8 pixels. The 0s don't change the sum. */ - vector unsigned char pix1l = vec_ld(0, pix1); - vector unsigned char pix1r = vec_ld(7, pix1); - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(7, pix2); - vector unsigned char t1 = vec_and(vec_perm(pix1l, pix1r, perm1), - permclear); - vector unsigned char t2 = vec_and(vec_perm(pix2l, pix2r, perm2), - permclear); + vector unsigned char pix1l = VEC_LD(0, pix1); + vector unsigned char pix2l = VEC_LD(0, pix2); + vector unsigned char t1 = vec_and(pix1l, permclear); + vector unsigned char t2 = vec_and(pix2l, permclear); /* Calculate a sum of abs differences vector. */ vector unsigned char t3 = vec_max(t1, t2); @@ -298,8 +298,8 @@ static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Add each 4 pixel group together and put 4 results into sad. */ sad = vec_sum4s(t5, sad); - pix1 += line_size; - pix2 += line_size; + pix1 += stride; + pix2 += stride; } /* Sum up the four partial sums, and put the result into s. */ @@ -313,16 +313,15 @@ static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced. * It's the sad8_altivec code above w/ squaring added. */ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) + ptrdiff_t stride, int h) { - int i, s; + int i; + int __attribute__((aligned(16))) s; const vector unsigned int zero = (const vector unsigned int) vec_splat_u32(0); const vector unsigned char permclear = (vector unsigned char) { 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 }; - vector unsigned char perm1 = vec_lvsl(0, pix1); - vector unsigned char perm2 = vec_lvsl(0, pix2); vector unsigned int sum = (vector unsigned int) vec_splat_u32(0); vector signed int sumsqr; @@ -330,14 +329,8 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Read potentially unaligned pixels into t1 and t2. * Since we're reading 16 pixels, and actually only want 8, * mask out the last 8 pixels. The 0s don't change the sum. */ - vector unsigned char pix1l = vec_ld(0, pix1); - vector unsigned char pix1r = vec_ld(7, pix1); - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(7, pix2); - vector unsigned char t1 = vec_and(vec_perm(pix1l, pix1r, perm1), - permclear); - vector unsigned char t2 = vec_and(vec_perm(pix2l, pix2r, perm2), - permclear); + vector unsigned char t1 = vec_and(VEC_LD(0, pix1), permclear); + vector unsigned char t2 = vec_and(VEC_LD(0, pix2), permclear); /* Since we want to use unsigned chars, we can take advantage * of the fact that abs(a - b) ^ 2 = (a - b) ^ 2. */ @@ -350,8 +343,8 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Square the values and add them to our sum. */ sum = vec_msum(t5, t5, sum); - pix1 += line_size; - pix2 += line_size; + pix1 += stride; + pix2 += stride; } /* Sum up the four partial sums, and put the result into s. */ @@ -365,21 +358,19 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Sum of Squared Errors for a 16x16 block, AltiVec-enhanced. * It's the sad16_altivec code above w/ squaring added. */ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) + ptrdiff_t stride, int h) { - int i, s; + int i; + int __attribute__((aligned(16))) s; const vector unsigned int zero = (const vector unsigned int) vec_splat_u32(0); - vector unsigned char perm = vec_lvsl(0, pix2); vector unsigned int sum = (vector unsigned int) vec_splat_u32(0); vector signed int sumsqr; for (i = 0; i < h; i++) { /* Read potentially unaligned pixels into t1 and t2. */ - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(15, pix2); vector unsigned char t1 = vec_ld(0, pix1); - vector unsigned char t2 = vec_perm(pix2l, pix2r, perm); + vector unsigned char t2 = VEC_LD(0, pix2); /* Since we want to use unsigned chars, we can take advantage * of the fact that abs(a - b) ^ 2 = (a - b) ^ 2. */ @@ -392,22 +383,22 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Square the values and add them to our sum. */ sum = vec_msum(t5, t5, sum); - pix1 += line_size; - pix2 += line_size; + pix1 += stride; + pix2 += stride; } /* Sum up the four partial sums, and put the result into s. */ sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero); sumsqr = vec_splat(sumsqr, 3); - vec_ste(sumsqr, 0, &s); + vec_ste(sumsqr, 0, &s); return s; } static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, - uint8_t *src, int stride, int h) + uint8_t *src, ptrdiff_t stride, int h) { - int sum; + int __attribute__((aligned(16))) sum; register const vector unsigned char vzero = (const vector unsigned char) vec_splat_u8(0); register vector signed short temp0, temp1, temp2, temp3, temp4, @@ -432,24 +423,19 @@ static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 }; + #define ONEITERBUTTERFLY(i, res) \ { \ - register vector unsigned char src1 = vec_ld(stride * i, src); \ - register vector unsigned char src2 = vec_ld(stride * i + 15, src); \ - register vector unsigned char srcO = \ - vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ - register vector unsigned char dst1 = vec_ld(stride * i, dst); \ - register vector unsigned char dst2 = vec_ld(stride * i + 15, dst); \ - register vector unsigned char dstO = \ - vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ + register vector unsigned char srcO = unaligned_load(stride * i, src); \ + register vector unsigned char dstO = unaligned_load(stride * i, dst);\ \ /* Promote the unsigned chars to signed shorts. */ \ /* We're in the 8x8 function, we only care for the first 8. */ \ register vector signed short srcV = \ - (vector signed short) vec_mergeh((vector signed char) vzero, \ + (vector signed short) VEC_MERGEH((vector signed char) vzero, \ (vector signed char) srcO); \ register vector signed short dstV = \ - (vector signed short) vec_mergeh((vector signed char) vzero, \ + (vector signed short) VEC_MERGEH((vector signed char) vzero, \ (vector signed char) dstO); \ \ /* subtractions inside the first butterfly */ \ @@ -461,6 +447,7 @@ static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, register vector signed short op3 = vec_perm(but2, but2, perm3); \ res = vec_mladd(but2, vprod3, op3); \ } + ONEITERBUTTERFLY(0, temp0); ONEITERBUTTERFLY(1, temp1); ONEITERBUTTERFLY(2, temp2); @@ -510,6 +497,7 @@ static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, vsum = vec_sum4s(vec_abs(line7C), vsum); vsum = vec_sums(vsum, (vector signed int) vzero); vsum = vec_splat(vsum, 3); + vec_ste(vsum, 0, &sum); } return sum; @@ -534,9 +522,9 @@ static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, * but xlc goes to around 660 on the regular C code... */ static int hadamard8_diff16x8_altivec(MpegEncContext *s, uint8_t *dst, - uint8_t *src, int stride, int h) + uint8_t *src, ptrdiff_t stride, int h) { - int sum; + int __attribute__((aligned(16))) sum; register vector signed short temp0 __asm__ ("v0"), temp1 __asm__ ("v1"), @@ -584,31 +572,23 @@ static int hadamard8_diff16x8_altivec(MpegEncContext *s, uint8_t *dst, #define ONEITERBUTTERFLY(i, res1, res2) \ { \ - register vector unsigned char src1 __asm__ ("v22") = \ - vec_ld(stride * i, src); \ - register vector unsigned char src2 __asm__ ("v23") = \ - vec_ld(stride * i + 16, src); \ register vector unsigned char srcO __asm__ ("v22") = \ - vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ - register vector unsigned char dst1 __asm__ ("v24") = \ - vec_ld(stride * i, dst); \ - register vector unsigned char dst2 __asm__ ("v25") = \ - vec_ld(stride * i + 16, dst); \ + unaligned_load(stride * i, src); \ register vector unsigned char dstO __asm__ ("v23") = \ - vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ + unaligned_load(stride * i, dst);\ \ /* Promote the unsigned chars to signed shorts. */ \ register vector signed short srcV __asm__ ("v24") = \ - (vector signed short) vec_mergeh((vector signed char) vzero, \ + (vector signed short) VEC_MERGEH((vector signed char) vzero, \ (vector signed char) srcO); \ register vector signed short dstV __asm__ ("v25") = \ - (vector signed short) vec_mergeh((vector signed char) vzero, \ + (vector signed short) VEC_MERGEH((vector signed char) vzero, \ (vector signed char) dstO); \ register vector signed short srcW __asm__ ("v26") = \ - (vector signed short) vec_mergel((vector signed char) vzero, \ + (vector signed short) VEC_MERGEL((vector signed char) vzero, \ (vector signed char) srcO); \ register vector signed short dstW __asm__ ("v27") = \ - (vector signed short) vec_mergel((vector signed char) vzero, \ + (vector signed short) VEC_MERGEL((vector signed char) vzero, \ (vector signed char) dstO); \ \ /* subtractions inside the first butterfly */ \ @@ -639,6 +619,7 @@ static int hadamard8_diff16x8_altivec(MpegEncContext *s, uint8_t *dst, res1 = vec_mladd(but2, vprod3, op3); \ res2 = vec_mladd(but2S, vprod3, op3S); \ } + ONEITERBUTTERFLY(0, temp0, temp0S); ONEITERBUTTERFLY(1, temp1, temp1S); ONEITERBUTTERFLY(2, temp2, temp2S); @@ -725,13 +706,14 @@ static int hadamard8_diff16x8_altivec(MpegEncContext *s, uint8_t *dst, vsum = vec_sum4s(vec_abs(line7CS), vsum); vsum = vec_sums(vsum, (vector signed int) vzero); vsum = vec_splat(vsum, 3); + vec_ste(vsum, 0, &sum); } return sum; } static int hadamard8_diff16_altivec(MpegEncContext *s, uint8_t *dst, - uint8_t *src, int stride, int h) + uint8_t *src, ptrdiff_t stride, int h) { int score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8);