X-Git-Url: https://git.piment-noir.org/?p=deb_x265.git;a=blobdiff_plain;f=source%2Fcommon%2Fyuv.cpp;fp=source%2Fcommon%2Fyuv.cpp;h=eedb5b2f6fa6e88c0b5e7dfc428e904b4d8045ba;hp=fffc2153742487b32073df0a93fee58401a005dc;hb=b53f7c52d8280ab63876efd6eb292c21430ac607;hpb=5c9b45285dd64723ad1dac380b98a7b1f3095674 diff --git a/source/common/yuv.cpp b/source/common/yuv.cpp index fffc215..eedb5b2 100644 --- a/source/common/yuv.cpp +++ b/source/common/yuv.cpp @@ -43,21 +43,31 @@ bool Yuv::create(uint32_t size, int csp) m_hChromaShift = CHROMA_H_SHIFT(csp); m_vChromaShift = CHROMA_V_SHIFT(csp); - // set width and height m_size = size; - m_csize = size >> m_hChromaShift; m_part = partitionFromSizes(size, size); - size_t sizeL = size * size; - size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift); + if (csp == X265_CSP_I400) + { + CHECKED_MALLOC(m_buf[0], pixel, size * size + 8); + m_buf[1] = m_buf[2] = 0; + m_csize = MAX_INT; + return true; + } + else + { + m_csize = size >> m_hChromaShift; - X265_CHECK((sizeC & 15) == 0, "invalid size"); + size_t sizeL = size * size; + size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift); - // memory allocation (padded for SIMD reads) - CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8); - m_buf[1] = m_buf[0] + sizeL; - m_buf[2] = m_buf[0] + sizeL + sizeC; - return true; + X265_CHECK((sizeC & 15) == 0, "invalid size"); + + // memory allocation (padded for SIMD reads) + CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8); + m_buf[1] = m_buf[0] + sizeL; + m_buf[2] = m_buf[0] + sizeL + sizeC; + return true; + } fail: return false; @@ -71,7 +81,6 @@ void Yuv::destroy() void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const { pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx); - primitives.luma_copy_pp[m_part](dstY, dstPic.m_stride, m_buf[0], m_size); pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx); @@ -82,29 +91,41 @@ void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) con void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx) { - /* We cheat with const_cast internally because the get methods are not capable of - * returning const buffers and the primitives are not const aware, but we know - * this function does not modify srcPic */ - PicYuv& srcPicSafe = const_cast(srcPic); - pixel* srcY = srcPicSafe.getLumaAddr(cuAddr, absPartIdx); - + const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx); primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcY, srcPic.m_stride); - pixel* srcU = srcPicSafe.getCbAddr(cuAddr, absPartIdx); - pixel* srcV = srcPicSafe.getCrAddr(cuAddr, absPartIdx); - primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPicSafe.m_strideC); - primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPicSafe.m_strideC); + const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx); + const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx); + primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPic.m_strideC); + primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPic.m_strideC); } void Yuv::copyFromYuv(const Yuv& srcYuv) { - X265_CHECK(m_size <= srcYuv.m_size, "invalid size\n"); + X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n"); primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size); primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize); primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize); } +/* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */ +void Yuv::copyPUFromYuv(const Yuv& srcYuv, uint32_t absPartIdx, int partEnum, bool bChroma) +{ + X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n"); + + const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size); + primitives.luma_copy_pp[partEnum](m_buf[0], m_size, srcY, srcYuv.m_size); + + if (bChroma) + { + const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx); + const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx); + primitives.chroma[m_csp].copy_pp[partEnum](m_buf[1], m_csize, srcU, srcYuv.m_csize); + primitives.chroma[m_csp].copy_pp[partEnum](m_buf[2], m_csize, srcV, srcYuv.m_csize); + } +} + void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const { pixel* dstY = dstYuv.getLumaAddr(absPartIdx); @@ -120,7 +141,6 @@ void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const { pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size); pixel* dstY = dstYuv.m_buf[0]; - primitives.luma_copy_pp[dstYuv.m_part](dstY, dstYuv.m_size, srcY, m_size); pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx); @@ -144,21 +164,19 @@ void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absP if (bLuma) { - int16_t* srcY0 = const_cast(srcYuv0).getLumaAddr(absPartIdx); - int16_t* srcY1 = const_cast(srcYuv1).getLumaAddr(absPartIdx); + const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx); + const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx); pixel* dstY = getLumaAddr(absPartIdx); - primitives.luma_addAvg[part](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size); } if (bChroma) { - int16_t* srcU0 = const_cast(srcYuv0).getCbAddr(absPartIdx); - int16_t* srcV0 = const_cast(srcYuv0).getCrAddr(absPartIdx); - int16_t* srcU1 = const_cast(srcYuv1).getCbAddr(absPartIdx); - int16_t* srcV1 = const_cast(srcYuv1).getCrAddr(absPartIdx); + const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx); + const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx); + const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx); + const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx); pixel* dstU = getCbAddr(absPartIdx); pixel* dstV = getCrAddr(absPartIdx); - primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize); primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize); } @@ -168,7 +186,7 @@ void Yuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size { const pixel* src = getLumaAddr(absPartIdx); pixel* dst = dstYuv.getLumaAddr(absPartIdx); - primitives.square_copy_pp[log2Size - 2](dst, dstYuv.m_size, const_cast(src), m_size); + primitives.luma_copy_pp[log2Size - 2](dst, dstYuv.m_size, src, m_size); } void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const @@ -178,7 +196,6 @@ void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Si const pixel* srcV = getCrAddr(absPartIdx); pixel* dstU = dstYuv.getCbAddr(absPartIdx); pixel* dstV = dstYuv.getCrAddr(absPartIdx); - - primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, const_cast(srcU), m_csize); - primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, const_cast(srcV), m_csize); + primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, srcU, m_csize); + primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, srcV, m_csize); }