fffc2153742487b32073df0a93fee58401a005dc
[deb_x265.git] / source / common / yuv.cpp
1 /*****************************************************************************
2 * Copyright (C) 2014 x265 project
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
19 *
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
23
24
25 #include "common.h"
26 #include "yuv.h"
27 #include "shortyuv.h"
28 #include "picyuv.h"
29 #include "primitives.h"
30
31 using namespace x265;
32
33 Yuv::Yuv()
34 {
35 m_buf[0] = NULL;
36 m_buf[1] = NULL;
37 m_buf[2] = NULL;
38 }
39
40 bool Yuv::create(uint32_t size, int csp)
41 {
42 m_csp = csp;
43 m_hChromaShift = CHROMA_H_SHIFT(csp);
44 m_vChromaShift = CHROMA_V_SHIFT(csp);
45
46 // set width and height
47 m_size = size;
48 m_csize = size >> m_hChromaShift;
49 m_part = partitionFromSizes(size, size);
50
51 size_t sizeL = size * size;
52 size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift);
53
54 X265_CHECK((sizeC & 15) == 0, "invalid size");
55
56 // memory allocation (padded for SIMD reads)
57 CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8);
58 m_buf[1] = m_buf[0] + sizeL;
59 m_buf[2] = m_buf[0] + sizeL + sizeC;
60 return true;
61
62 fail:
63 return false;
64 }
65
66 void Yuv::destroy()
67 {
68 X265_FREE(m_buf[0]);
69 }
70
71 void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const
72 {
73 pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx);
74
75 primitives.luma_copy_pp[m_part](dstY, dstPic.m_stride, m_buf[0], m_size);
76
77 pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx);
78 pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx);
79 primitives.chroma[m_csp].copy_pp[m_part](dstU, dstPic.m_strideC, m_buf[1], m_csize);
80 primitives.chroma[m_csp].copy_pp[m_part](dstV, dstPic.m_strideC, m_buf[2], m_csize);
81 }
82
83 void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx)
84 {
85 /* We cheat with const_cast internally because the get methods are not capable of
86 * returning const buffers and the primitives are not const aware, but we know
87 * this function does not modify srcPic */
88 PicYuv& srcPicSafe = const_cast<PicYuv&>(srcPic);
89 pixel* srcY = srcPicSafe.getLumaAddr(cuAddr, absPartIdx);
90
91 primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcY, srcPic.m_stride);
92
93 pixel* srcU = srcPicSafe.getCbAddr(cuAddr, absPartIdx);
94 pixel* srcV = srcPicSafe.getCrAddr(cuAddr, absPartIdx);
95 primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPicSafe.m_strideC);
96 primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPicSafe.m_strideC);
97 }
98
99 void Yuv::copyFromYuv(const Yuv& srcYuv)
100 {
101 X265_CHECK(m_size <= srcYuv.m_size, "invalid size\n");
102
103 primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size);
104 primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize);
105 primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize);
106 }
107
108 void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const
109 {
110 pixel* dstY = dstYuv.getLumaAddr(absPartIdx);
111 primitives.luma_copy_pp[m_part](dstY, dstYuv.m_size, m_buf[0], m_size);
112
113 pixel* dstU = dstYuv.getCbAddr(absPartIdx);
114 pixel* dstV = dstYuv.getCrAddr(absPartIdx);
115 primitives.chroma[m_csp].copy_pp[m_part](dstU, dstYuv.m_csize, m_buf[1], m_csize);
116 primitives.chroma[m_csp].copy_pp[m_part](dstV, dstYuv.m_csize, m_buf[2], m_csize);
117 }
118
119 void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const
120 {
121 pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size);
122 pixel* dstY = dstYuv.m_buf[0];
123
124 primitives.luma_copy_pp[dstYuv.m_part](dstY, dstYuv.m_size, srcY, m_size);
125
126 pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx);
127 pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx);
128 pixel* dstU = dstYuv.m_buf[1];
129 pixel* dstV = dstYuv.m_buf[2];
130 primitives.chroma[m_csp].copy_pp[dstYuv.m_part](dstU, dstYuv.m_csize, srcU, m_csize);
131 primitives.chroma[m_csp].copy_pp[dstYuv.m_part](dstV, dstYuv.m_csize, srcV, m_csize);
132 }
133
134 void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL)
135 {
136 primitives.luma_add_ps[log2SizeL - 2](m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size);
137 primitives.chroma[m_csp].add_ps[log2SizeL - 2](m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize);
138 primitives.chroma[m_csp].add_ps[log2SizeL - 2](m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize);
139 }
140
141 void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
142 {
143 int part = partitionFromSizes(width, height);
144
145 if (bLuma)
146 {
147 int16_t* srcY0 = const_cast<ShortYuv&>(srcYuv0).getLumaAddr(absPartIdx);
148 int16_t* srcY1 = const_cast<ShortYuv&>(srcYuv1).getLumaAddr(absPartIdx);
149 pixel* dstY = getLumaAddr(absPartIdx);
150
151 primitives.luma_addAvg[part](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size);
152 }
153 if (bChroma)
154 {
155 int16_t* srcU0 = const_cast<ShortYuv&>(srcYuv0).getCbAddr(absPartIdx);
156 int16_t* srcV0 = const_cast<ShortYuv&>(srcYuv0).getCrAddr(absPartIdx);
157 int16_t* srcU1 = const_cast<ShortYuv&>(srcYuv1).getCbAddr(absPartIdx);
158 int16_t* srcV1 = const_cast<ShortYuv&>(srcYuv1).getCrAddr(absPartIdx);
159 pixel* dstU = getCbAddr(absPartIdx);
160 pixel* dstV = getCrAddr(absPartIdx);
161
162 primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
163 primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize);
164 }
165 }
166
167 void Yuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const
168 {
169 const pixel* src = getLumaAddr(absPartIdx);
170 pixel* dst = dstYuv.getLumaAddr(absPartIdx);
171 primitives.square_copy_pp[log2Size - 2](dst, dstYuv.m_size, const_cast<pixel*>(src), m_size);
172 }
173
174 void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const
175 {
176 int part = partitionFromLog2Size(log2SizeL);
177 const pixel* srcU = getCbAddr(absPartIdx);
178 const pixel* srcV = getCrAddr(absPartIdx);
179 pixel* dstU = dstYuv.getCbAddr(absPartIdx);
180 pixel* dstV = dstYuv.getCrAddr(absPartIdx);
181
182 primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, const_cast<pixel*>(srcU), m_csize);
183 primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, const_cast<pixel*>(srcV), m_csize);
184 }