Imported Upstream version 1.4+222+hg5f9f7194267b

[deb_x265.git] / source / common / yuv.cpp
diff --git a/source/common/yuv.cpp b/source/common/yuv.cpp

index fffc2153742487b32073df0a93fee58401a005dc..eedb5b2f6fa6e88c0b5e7dfc428e904b4d8045ba 100644 (file)
--- a/source/common/yuv.cpp
+++ b/source/common/yuv.cpp
@@ -43,21 +43,31 @@ bool Yuv::create(uint32_t size, int csp)
      m_hChromaShift = CHROMA_H_SHIFT(csp);
      m_vChromaShift = CHROMA_V_SHIFT(csp);
  
-    // set width and height
      m_size  = size;
-    m_csize = size >> m_hChromaShift;
      m_part = partitionFromSizes(size, size);
  
-    size_t sizeL = size * size;
-    size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift);
+    if (csp == X265_CSP_I400)
+    {
+        CHECKED_MALLOC(m_buf[0], pixel, size * size + 8);
+        m_buf[1] = m_buf[2] = 0;
+        m_csize = MAX_INT;
+        return true;
+    }
+    else
+    {
+        m_csize = size >> m_hChromaShift;
  
-    X265_CHECK((sizeC & 15) == 0, "invalid size");
+        size_t sizeL = size * size;
+        size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift);
  
-    // memory allocation (padded for SIMD reads)
-    CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8);
-    m_buf[1] = m_buf[0] + sizeL;
-    m_buf[2] = m_buf[0] + sizeL + sizeC;
-    return true;
+        X265_CHECK((sizeC & 15) == 0, "invalid size");
+
+        // memory allocation (padded for SIMD reads)
+        CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8);
+        m_buf[1] = m_buf[0] + sizeL;
+        m_buf[2] = m_buf[0] + sizeL + sizeC;
+        return true;
+    }
  
  fail:
      return false;
@@ -71,7 +81,6 @@ void Yuv::destroy()
  void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const
  {
      pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
-
      primitives.luma_copy_pp[m_part](dstY, dstPic.m_stride, m_buf[0], m_size);
  
      pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
@@ -82,29 +91,41 @@ void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) con
  
  void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
  {
-    /* We cheat with const_cast internally because the get methods are not capable of
-     * returning const buffers and the primitives are not const aware, but we know
-     * this function does not modify srcPic */
-    PicYuv& srcPicSafe = const_cast<PicYuv&>(srcPic);
-    pixel* srcY = srcPicSafe.getLumaAddr(cuAddr, absPartIdx);
-
+    const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
      primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcY, srcPic.m_stride);
  
-    pixel* srcU = srcPicSafe.getCbAddr(cuAddr, absPartIdx);
-    pixel* srcV = srcPicSafe.getCrAddr(cuAddr, absPartIdx);
-    primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPicSafe.m_strideC);
-    primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPicSafe.m_strideC);
+    const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
+    const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
+    primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPic.m_strideC);
+    primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPic.m_strideC);
  }
  
  void Yuv::copyFromYuv(const Yuv& srcYuv)
  {
-    X265_CHECK(m_size <= srcYuv.m_size, "invalid size\n");
+    X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
  
      primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
      primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
      primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
  }
  
+/* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */
+void Yuv::copyPUFromYuv(const Yuv& srcYuv, uint32_t absPartIdx, int partEnum, bool bChroma)
+{
+    X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n");
+
+    const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size);
+    primitives.luma_copy_pp[partEnum](m_buf[0], m_size, srcY, srcYuv.m_size);
+
+    if (bChroma)
+    {
+        const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx);
+        const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx);
+        primitives.chroma[m_csp].copy_pp[partEnum](m_buf[1], m_csize, srcU, srcYuv.m_csize);
+        primitives.chroma[m_csp].copy_pp[partEnum](m_buf[2], m_csize, srcV, srcYuv.m_csize);
+    }
+}
+
  void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const
  {
      pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
@@ -120,7 +141,6 @@ void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
  {
      pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size);
      pixel* dstY = dstYuv.m_buf[0];
-
      primitives.luma_copy_pp[dstYuv.m_part](dstY, dstYuv.m_size, srcY, m_size);
  
      pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
@@ -144,21 +164,19 @@ void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absP
  
      if (bLuma)
      {
-        int16_t* srcY0 = const_cast<ShortYuv&>(srcYuv0).getLumaAddr(absPartIdx);
-        int16_t* srcY1 = const_cast<ShortYuv&>(srcYuv1).getLumaAddr(absPartIdx);
+        const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx);
+        const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx);
          pixel* dstY = getLumaAddr(absPartIdx);
-
          primitives.luma_addAvg[part](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
      }
      if (bChroma)
      {
-        int16_t* srcU0 = const_cast<ShortYuv&>(srcYuv0).getCbAddr(absPartIdx);
-        int16_t* srcV0 = const_cast<ShortYuv&>(srcYuv0).getCrAddr(absPartIdx);
-        int16_t* srcU1 = const_cast<ShortYuv&>(srcYuv1).getCbAddr(absPartIdx);
-        int16_t* srcV1 = const_cast<ShortYuv&>(srcYuv1).getCrAddr(absPartIdx);
+        const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx);
+        const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx);
+        const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx);
+        const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx);
          pixel* dstU = getCbAddr(absPartIdx);
          pixel* dstV = getCrAddr(absPartIdx);
-
          primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
          primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
      }
@@ -168,7 +186,7 @@ void Yuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size
  {
      const pixel* src = getLumaAddr(absPartIdx);
      pixel* dst = dstYuv.getLumaAddr(absPartIdx);
-    primitives.square_copy_pp[log2Size - 2](dst, dstYuv.m_size, const_cast<pixel*>(src), m_size);
+    primitives.luma_copy_pp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
  }
  
  void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
@@ -178,7 +196,6 @@ void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Si
      const pixel* srcV = getCrAddr(absPartIdx);
      pixel* dstU = dstYuv.getCbAddr(absPartIdx);
      pixel* dstV = dstYuv.getCrAddr(absPartIdx);
-
-    primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, const_cast<pixel*>(srcU), m_csize);
-    primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, const_cast<pixel*>(srcV), m_csize);
+    primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, srcU, m_csize);
+    primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, srcV, m_csize);
  }