Imported Upstream version 1.4+222+hg5f9f7194267b
[deb_x265.git] / source / common / yuv.cpp
CommitLineData
72b9787e
JB
1/*****************************************************************************
2 * Copyright (C) 2014 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24
25#include "common.h"
26#include "yuv.h"
27#include "shortyuv.h"
28#include "picyuv.h"
29#include "primitives.h"
30
31using namespace x265;
32
33Yuv::Yuv()
34{
35 m_buf[0] = NULL;
36 m_buf[1] = NULL;
37 m_buf[2] = NULL;
38}
39
40bool Yuv::create(uint32_t size, int csp)
41{
42 m_csp = csp;
43 m_hChromaShift = CHROMA_H_SHIFT(csp);
44 m_vChromaShift = CHROMA_V_SHIFT(csp);
45
72b9787e 46 m_size = size;
72b9787e
JB
47 m_part = partitionFromSizes(size, size);
48
b53f7c52
JB
49 if (csp == X265_CSP_I400)
50 {
51 CHECKED_MALLOC(m_buf[0], pixel, size * size + 8);
52 m_buf[1] = m_buf[2] = 0;
53 m_csize = MAX_INT;
54 return true;
55 }
56 else
57 {
58 m_csize = size >> m_hChromaShift;
72b9787e 59
b53f7c52
JB
60 size_t sizeL = size * size;
61 size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift);
72b9787e 62
b53f7c52
JB
63 X265_CHECK((sizeC & 15) == 0, "invalid size");
64
65 // memory allocation (padded for SIMD reads)
66 CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8);
67 m_buf[1] = m_buf[0] + sizeL;
68 m_buf[2] = m_buf[0] + sizeL + sizeC;
69 return true;
70 }
72b9787e
JB
71
72fail:
73 return false;
74}
75
76void Yuv::destroy()
77{
78 X265_FREE(m_buf[0]);
79}
80
81void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const
82{
83 pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
72b9787e
JB
84 primitives.luma_copy_pp[m_part](dstY, dstPic.m_stride, m_buf[0], m_size);
85
86 pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
87 pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
88 primitives.chroma[m_csp].copy_pp[m_part](dstU, dstPic.m_strideC, m_buf[1], m_csize);
89 primitives.chroma[m_csp].copy_pp[m_part](dstV, dstPic.m_strideC, m_buf[2], m_csize);
90}
91
92void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
93{
b53f7c52 94 const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
72b9787e
JB
95 primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcY, srcPic.m_stride);
96
b53f7c52
JB
97 const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
98 const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
99 primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPic.m_strideC);
100 primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPic.m_strideC);
72b9787e
JB
101}
102
103void Yuv::copyFromYuv(const Yuv& srcYuv)
104{
b53f7c52 105 X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
72b9787e
JB
106
107 primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
108 primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
109 primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
110}
111
b53f7c52
JB
112/* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */
113void Yuv::copyPUFromYuv(const Yuv& srcYuv, uint32_t absPartIdx, int partEnum, bool bChroma)
114{
115 X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n");
116
117 const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size);
118 primitives.luma_copy_pp[partEnum](m_buf[0], m_size, srcY, srcYuv.m_size);
119
120 if (bChroma)
121 {
122 const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx);
123 const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx);
124 primitives.chroma[m_csp].copy_pp[partEnum](m_buf[1], m_csize, srcU, srcYuv.m_csize);
125 primitives.chroma[m_csp].copy_pp[partEnum](m_buf[2], m_csize, srcV, srcYuv.m_csize);
126 }
127}
128
72b9787e
JB
129void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const
130{
131 pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
132 primitives.luma_copy_pp[m_part](dstY, dstYuv.m_size, m_buf[0], m_size);
133
134 pixel* dstU = dstYuv.getCbAddr(absPartIdx);
135 pixel* dstV = dstYuv.getCrAddr(absPartIdx);
136 primitives.chroma[m_csp].copy_pp[m_part](dstU, dstYuv.m_csize, m_buf[1], m_csize);
137 primitives.chroma[m_csp].copy_pp[m_part](dstV, dstYuv.m_csize, m_buf[2], m_csize);
138}
139
140void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
141{
142 pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size);
143 pixel* dstY = dstYuv.m_buf[0];
72b9787e
JB
144 primitives.luma_copy_pp[dstYuv.m_part](dstY, dstYuv.m_size, srcY, m_size);
145
146 pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
147 pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
148 pixel* dstU = dstYuv.m_buf[1];
149 pixel* dstV = dstYuv.m_buf[2];
150 primitives.chroma[m_csp].copy_pp[dstYuv.m_part](dstU, dstYuv.m_csize, srcU, m_csize);
151 primitives.chroma[m_csp].copy_pp[dstYuv.m_part](dstV, dstYuv.m_csize, srcV, m_csize);
152}
153
154void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL)
155{
156 primitives.luma_add_ps[log2SizeL - 2](m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
157 primitives.chroma[m_csp].add_ps[log2SizeL - 2](m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
158 primitives.chroma[m_csp].add_ps[log2SizeL - 2](m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
159}
160
161void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
162{
163 int part = partitionFromSizes(width, height);
164
165 if (bLuma)
166 {
b53f7c52
JB
167 const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx);
168 const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx);
72b9787e 169 pixel* dstY = getLumaAddr(absPartIdx);
72b9787e
JB
170 primitives.luma_addAvg[part](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
171 }
172 if (bChroma)
173 {
b53f7c52
JB
174 const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx);
175 const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx);
176 const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx);
177 const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx);
72b9787e
JB
178 pixel* dstU = getCbAddr(absPartIdx);
179 pixel* dstV = getCrAddr(absPartIdx);
72b9787e
JB
180 primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
181 primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
182 }
183}
184
185void Yuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
186{
187 const pixel* src = getLumaAddr(absPartIdx);
188 pixel* dst = dstYuv.getLumaAddr(absPartIdx);
b53f7c52 189 primitives.luma_copy_pp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
72b9787e
JB
190}
191
192void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
193{
194 int part = partitionFromLog2Size(log2SizeL);
195 const pixel* srcU = getCbAddr(absPartIdx);
196 const pixel* srcV = getCrAddr(absPartIdx);
197 pixel* dstU = dstYuv.getCbAddr(absPartIdx);
198 pixel* dstV = dstYuv.getCrAddr(absPartIdx);
b53f7c52
JB
199 primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, srcU, m_csize);
200 primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, srcV, m_csize);
72b9787e 201}