Imported Upstream version 1.4+222+hg5f9f7194267b
[deb_x265.git] / source / common / yuv.cpp
1 /*****************************************************************************
2 * Copyright (C) 2014 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24
25 #include "common.h"
26 #include "yuv.h"
27 #include "shortyuv.h"
28 #include "picyuv.h"
29 #include "primitives.h"
30
31 using namespace x265;
32
33 Yuv::Yuv()
34 {
35 m_buf[0] = NULL;
36 m_buf[1] = NULL;
37 m_buf[2] = NULL;
38 }
39
40 bool Yuv::create(uint32_t size, int csp)
41 {
42 m_csp = csp;
43 m_hChromaShift = CHROMA_H_SHIFT(csp);
44 m_vChromaShift = CHROMA_V_SHIFT(csp);
45
46 m_size = size;
47 m_part = partitionFromSizes(size, size);
48
49 if (csp == X265_CSP_I400)
50 {
51 CHECKED_MALLOC(m_buf[0], pixel, size * size + 8);
52 m_buf[1] = m_buf[2] = 0;
53 m_csize = MAX_INT;
54 return true;
55 }
56 else
57 {
58 m_csize = size >> m_hChromaShift;
59
60 size_t sizeL = size * size;
61 size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift);
62
63 X265_CHECK((sizeC & 15) == 0, "invalid size");
64
65 // memory allocation (padded for SIMD reads)
66 CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8);
67 m_buf[1] = m_buf[0] + sizeL;
68 m_buf[2] = m_buf[0] + sizeL + sizeC;
69 return true;
70 }
71
72 fail:
73 return false;
74 }
75
76 void Yuv::destroy()
77 {
78 X265_FREE(m_buf[0]);
79 }
80
81 void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const
82 {
83 pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
84 primitives.luma_copy_pp[m_part](dstY, dstPic.m_stride, m_buf[0], m_size);
85
86 pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
87 pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
88 primitives.chroma[m_csp].copy_pp[m_part](dstU, dstPic.m_strideC, m_buf[1], m_csize);
89 primitives.chroma[m_csp].copy_pp[m_part](dstV, dstPic.m_strideC, m_buf[2], m_csize);
90 }
91
92 void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
93 {
94 const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx);
95 primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcY, srcPic.m_stride);
96
97 const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx);
98 const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx);
99 primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPic.m_strideC);
100 primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPic.m_strideC);
101 }
102
103 void Yuv::copyFromYuv(const Yuv& srcYuv)
104 {
105 X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n");
106
107 primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
108 primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
109 primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
110 }
111
112 /* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */
113 void Yuv::copyPUFromYuv(const Yuv& srcYuv, uint32_t absPartIdx, int partEnum, bool bChroma)
114 {
115 X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n");
116
117 const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size);
118 primitives.luma_copy_pp[partEnum](m_buf[0], m_size, srcY, srcYuv.m_size);
119
120 if (bChroma)
121 {
122 const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx);
123 const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx);
124 primitives.chroma[m_csp].copy_pp[partEnum](m_buf[1], m_csize, srcU, srcYuv.m_csize);
125 primitives.chroma[m_csp].copy_pp[partEnum](m_buf[2], m_csize, srcV, srcYuv.m_csize);
126 }
127 }
128
129 void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const
130 {
131 pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
132 primitives.luma_copy_pp[m_part](dstY, dstYuv.m_size, m_buf[0], m_size);
133
134 pixel* dstU = dstYuv.getCbAddr(absPartIdx);
135 pixel* dstV = dstYuv.getCrAddr(absPartIdx);
136 primitives.chroma[m_csp].copy_pp[m_part](dstU, dstYuv.m_csize, m_buf[1], m_csize);
137 primitives.chroma[m_csp].copy_pp[m_part](dstV, dstYuv.m_csize, m_buf[2], m_csize);
138 }
139
140 void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
141 {
142 pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size);
143 pixel* dstY = dstYuv.m_buf[0];
144 primitives.luma_copy_pp[dstYuv.m_part](dstY, dstYuv.m_size, srcY, m_size);
145
146 pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
147 pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
148 pixel* dstU = dstYuv.m_buf[1];
149 pixel* dstV = dstYuv.m_buf[2];
150 primitives.chroma[m_csp].copy_pp[dstYuv.m_part](dstU, dstYuv.m_csize, srcU, m_csize);
151 primitives.chroma[m_csp].copy_pp[dstYuv.m_part](dstV, dstYuv.m_csize, srcV, m_csize);
152 }
153
154 void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL)
155 {
156 primitives.luma_add_ps[log2SizeL - 2](m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
157 primitives.chroma[m_csp].add_ps[log2SizeL - 2](m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
158 primitives.chroma[m_csp].add_ps[log2SizeL - 2](m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
159 }
160
161 void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
162 {
163 int part = partitionFromSizes(width, height);
164
165 if (bLuma)
166 {
167 const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx);
168 const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx);
169 pixel* dstY = getLumaAddr(absPartIdx);
170 primitives.luma_addAvg[part](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
171 }
172 if (bChroma)
173 {
174 const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx);
175 const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx);
176 const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx);
177 const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx);
178 pixel* dstU = getCbAddr(absPartIdx);
179 pixel* dstV = getCrAddr(absPartIdx);
180 primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
181 primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
182 }
183 }
184
185 void Yuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
186 {
187 const pixel* src = getLumaAddr(absPartIdx);
188 pixel* dst = dstYuv.getLumaAddr(absPartIdx);
189 primitives.luma_copy_pp[log2Size - 2](dst, dstYuv.m_size, src, m_size);
190 }
191
192 void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
193 {
194 int part = partitionFromLog2Size(log2SizeL);
195 const pixel* srcU = getCbAddr(absPartIdx);
196 const pixel* srcV = getCrAddr(absPartIdx);
197 pixel* dstU = dstYuv.getCbAddr(absPartIdx);
198 pixel* dstV = dstYuv.getCrAddr(absPartIdx);
199 primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, srcU, m_csize);
200 primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, srcV, m_csize);
201 }