m_hChromaShift = CHROMA_H_SHIFT(csp);
m_vChromaShift = CHROMA_V_SHIFT(csp);
- // set width and height
m_size = size;
- m_csize = size >> m_hChromaShift;
m_part = partitionFromSizes(size, size);
- size_t sizeL = size * size;
- size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift);
+ if (csp == X265_CSP_I400)
+ {
+ CHECKED_MALLOC(m_buf[0], pixel, size * size + 8);
+ m_buf[1] = m_buf[2] = 0;
+ m_csize = MAX_INT;
+ return true;
+ }
+ else
+ {
+ m_csize = size >> m_hChromaShift;
- X265_CHECK((sizeC & 15) == 0, "invalid size");
+ size_t sizeL = size * size;
+ size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift);
- // memory allocation (padded for SIMD reads)
- CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8);
- m_buf[1] = m_buf[0] + sizeL;
- m_buf[2] = m_buf[0] + sizeL + sizeC;
- return true;
+ X265_CHECK((sizeC & 15) == 0, "invalid size");
+
+ // memory allocation (padded for SIMD reads)
+ CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8);
+ m_buf[1] = m_buf[0] + sizeL;
+ m_buf[2] = m_buf[0] + sizeL + sizeC;
+ return true;
+ }
fail:
return false;
void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const
{
pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
-
primitives.luma_copy_pp[m_part](dstY, dstPic.m_stride, m_buf[0], m_size);
pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
{
- /* We cheat with const_cast internally because the get methods are not capable of
- * returning const buffers and the primitives are not const aware, but we know
- * this function does not modify srcPic */
- PicYuv& srcPicSafe = const_cast<PicYuv&>(srcPic);
- pixel* srcY = srcPicSafe.getLumaAddr(cuAddr, absPartIdx);
-
+ const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcY, srcPic.m_stride);
- pixel* srcU = srcPicSafe.getCbAddr(cuAddr, absPartIdx);
- pixel* srcV = srcPicSafe.getCrAddr(cuAddr, absPartIdx);
- primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPicSafe.m_strideC);
- primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPicSafe.m_strideC);
+ const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
+ const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
+ primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPic.m_strideC);
+ primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPic.m_strideC);
}
void Yuv::copyFromYuv(const Yuv& srcYuv)
{
- X265_CHECK(m_size <= srcYuv.m_size, "invalid size\n");
+ X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
}
+/* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */
+void Yuv::copyPUFromYuv(const Yuv& srcYuv, uint32_t absPartIdx, int partEnum, bool bChroma)
+{
+ X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n");
+
+ const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size);
+ primitives.luma_copy_pp[partEnum](m_buf[0], m_size, srcY, srcYuv.m_size);
+
+ if (bChroma)
+ {
+ const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx);
+ const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx);
+ primitives.chroma[m_csp].copy_pp[partEnum](m_buf[1], m_csize, srcU, srcYuv.m_csize);
+ primitives.chroma[m_csp].copy_pp[partEnum](m_buf[2], m_csize, srcV, srcYuv.m_csize);
+ }
+}
+
void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const
{
pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
{
pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size);
pixel* dstY = dstYuv.m_buf[0];
-
primitives.luma_copy_pp[dstYuv.m_part](dstY, dstYuv.m_size, srcY, m_size);
pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
if (bLuma)
{
- int16_t* srcY0 = const_cast<ShortYuv&>(srcYuv0).getLumaAddr(absPartIdx);
- int16_t* srcY1 = const_cast<ShortYuv&>(srcYuv1).getLumaAddr(absPartIdx);
+ const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx);
+ const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx);
pixel* dstY = getLumaAddr(absPartIdx);
-
primitives.luma_addAvg[part](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
}
if (bChroma)
{
- int16_t* srcU0 = const_cast<ShortYuv&>(srcYuv0).getCbAddr(absPartIdx);
- int16_t* srcV0 = const_cast<ShortYuv&>(srcYuv0).getCrAddr(absPartIdx);
- int16_t* srcU1 = const_cast<ShortYuv&>(srcYuv1).getCbAddr(absPartIdx);
- int16_t* srcV1 = const_cast<ShortYuv&>(srcYuv1).getCrAddr(absPartIdx);
+ const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx);
+ const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx);
+ const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx);
+ const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx);
pixel* dstU = getCbAddr(absPartIdx);
pixel* dstV = getCrAddr(absPartIdx);
-
primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
}
{
const pixel* src = getLumaAddr(absPartIdx);
pixel* dst = dstYuv.getLumaAddr(absPartIdx);
- primitives.square_copy_pp[log2Size - 2](dst, dstYuv.m_size, const_cast<pixel*>(src), m_size);
+ primitives.luma_copy_pp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
}
void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
const pixel* srcV = getCrAddr(absPartIdx);
pixel* dstU = dstYuv.getCbAddr(absPartIdx);
pixel* dstV = dstYuv.getCrAddr(absPartIdx);
-
- primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, const_cast<pixel*>(srcU), m_csize);
- primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, const_cast<pixel*>(srcV), m_csize);
+ primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, srcU, m_csize);
+ primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, srcV, m_csize);
}