X-Git-Url: https://git.piment-noir.org/?p=deb_x265.git;a=blobdiff_plain;f=source%2Fcommon%2Fvec%2Fdct-sse41.cpp;h=81a7889d1b4c169f6e316fa0ac79128acc0ef472;hp=aa52709b087194d69231dc46f4e15e25dfa7f4b7;hb=b53f7c52d8280ab63876efd6eb292c21430ac607;hpb=5c9b45285dd64723ad1dac380b98a7b1f3095674 diff --git a/source/common/vec/dct-sse41.cpp b/source/common/vec/dct-sse41.cpp index aa52709..81a7889 100644 --- a/source/common/vec/dct-sse41.cpp +++ b/source/common/vec/dct-sse41.cpp @@ -36,7 +36,7 @@ using namespace x265; namespace { -void dequant_scaling(const int16_t* quantCoef, const int32_t *deQuantCoef, int32_t* coef, int num, int per, int shift) +void dequant_scaling(const int16_t* quantCoef, const int32_t *deQuantCoef, int16_t* coef, int num, int per, int shift) { X265_CHECK(num <= 32 * 32, "dequant num too large\n"); @@ -66,11 +66,7 @@ void dequant_scaling(const int16_t* quantCoef, const int32_t *deQuantCoef, int32 quantCoef2 = _mm_sra_epi32(_mm_add_epi32(_mm_mullo_epi32(quantCoef2, deQuantCoef2), IAdd), _mm_cvtsi32_si128(shift - per)); quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2); - sign = _mm_srai_epi16(quantCoef12, 15); - quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign); - _mm_storeu_si128((__m128i*)(coef + n), quantCoef1); - quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign); - _mm_storeu_si128((__m128i*)(coef + n + 4), quantCoef2); + _mm_storeu_si128((__m128i*)(coef + n), quantCoef12); } } else @@ -100,11 +96,7 @@ void dequant_scaling(const int16_t* quantCoef, const int32_t *deQuantCoef, int32 quantCoef2 = _mm_sll_epi32(quantCoef2, _mm_cvtsi32_si128(per - shift)); quantCoef12 = _mm_packs_epi32(quantCoef1, quantCoef2); - sign = _mm_srai_epi16(quantCoef12, 15); - quantCoef1 = _mm_unpacklo_epi16(quantCoef12, sign); - _mm_storeu_si128((__m128i*)(coef + n), quantCoef1); - quantCoef2 = _mm_unpackhi_epi16(quantCoef12, sign); - _mm_storeu_si128((__m128i*)(coef + n + 4), quantCoef2); + _mm_storeu_si128((__m128i*)(coef + n), quantCoef12); } } }