Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2014 x265 project | |
3 | * | |
4 | * Authors: Steve Borho <steve@borho.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com. | |
22 | *****************************************************************************/ | |
23 | ||
24 | ||
25 | #include "common.h" | |
26 | #include "yuv.h" | |
27 | #include "shortyuv.h" | |
28 | #include "picyuv.h" | |
29 | #include "primitives.h" | |
30 | ||
31 | using namespace x265; | |
32 | ||
33 | Yuv::Yuv() | |
34 | { | |
35 | m_buf[0] = NULL; | |
36 | m_buf[1] = NULL; | |
37 | m_buf[2] = NULL; | |
38 | } | |
39 | ||
40 | bool Yuv::create(uint32_t size, int csp) | |
41 | { | |
42 | m_csp = csp; | |
43 | m_hChromaShift = CHROMA_H_SHIFT(csp); | |
44 | m_vChromaShift = CHROMA_V_SHIFT(csp); | |
45 | ||
72b9787e | 46 | m_size = size; |
72b9787e JB |
47 | m_part = partitionFromSizes(size, size); |
48 | ||
b53f7c52 JB |
49 | if (csp == X265_CSP_I400) |
50 | { | |
51 | CHECKED_MALLOC(m_buf[0], pixel, size * size + 8); | |
52 | m_buf[1] = m_buf[2] = 0; | |
53 | m_csize = MAX_INT; | |
54 | return true; | |
55 | } | |
56 | else | |
57 | { | |
58 | m_csize = size >> m_hChromaShift; | |
72b9787e | 59 | |
b53f7c52 JB |
60 | size_t sizeL = size * size; |
61 | size_t sizeC = sizeL >> (m_vChromaShift + m_hChromaShift); | |
72b9787e | 62 | |
b53f7c52 JB |
63 | X265_CHECK((sizeC & 15) == 0, "invalid size"); |
64 | ||
65 | // memory allocation (padded for SIMD reads) | |
66 | CHECKED_MALLOC(m_buf[0], pixel, sizeL + sizeC * 2 + 8); | |
67 | m_buf[1] = m_buf[0] + sizeL; | |
68 | m_buf[2] = m_buf[0] + sizeL + sizeC; | |
69 | return true; | |
70 | } | |
72b9787e JB |
71 | |
72 | fail: | |
73 | return false; | |
74 | } | |
75 | ||
76 | void Yuv::destroy() | |
77 | { | |
78 | X265_FREE(m_buf[0]); | |
79 | } | |
80 | ||
81 | void Yuv::copyToPicYuv(PicYuv& dstPic, uint32_t cuAddr, uint32_t absPartIdx) const | |
82 | { | |
83 | pixel* dstY = dstPic.getLumaAddr(cuAddr, absPartIdx); | |
72b9787e JB |
84 | primitives.luma_copy_pp[m_part](dstY, dstPic.m_stride, m_buf[0], m_size); |
85 | ||
86 | pixel* dstU = dstPic.getCbAddr(cuAddr, absPartIdx); | |
87 | pixel* dstV = dstPic.getCrAddr(cuAddr, absPartIdx); | |
88 | primitives.chroma[m_csp].copy_pp[m_part](dstU, dstPic.m_strideC, m_buf[1], m_csize); | |
89 | primitives.chroma[m_csp].copy_pp[m_part](dstV, dstPic.m_strideC, m_buf[2], m_csize); | |
90 | } | |
91 | ||
92 | void Yuv::copyFromPicYuv(const PicYuv& srcPic, uint32_t cuAddr, uint32_t absPartIdx) | |
93 | { | |
b53f7c52 | 94 | const pixel* srcY = srcPic.getLumaAddr(cuAddr, absPartIdx); |
72b9787e JB |
95 | primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcY, srcPic.m_stride); |
96 | ||
b53f7c52 JB |
97 | const pixel* srcU = srcPic.getCbAddr(cuAddr, absPartIdx); |
98 | const pixel* srcV = srcPic.getCrAddr(cuAddr, absPartIdx); | |
99 | primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcU, srcPic.m_strideC); | |
100 | primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcV, srcPic.m_strideC); | |
72b9787e JB |
101 | } |
102 | ||
103 | void Yuv::copyFromYuv(const Yuv& srcYuv) | |
104 | { | |
b53f7c52 | 105 | X265_CHECK(m_size >= srcYuv.m_size, "invalid size\n"); |
72b9787e JB |
106 | |
107 | primitives.luma_copy_pp[m_part](m_buf[0], m_size, srcYuv.m_buf[0], srcYuv.m_size); | |
108 | primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_csize, srcYuv.m_buf[1], srcYuv.m_csize); | |
109 | primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_csize, srcYuv.m_buf[2], srcYuv.m_csize); | |
110 | } | |
111 | ||
b53f7c52 JB |
112 | /* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */ |
113 | void Yuv::copyPUFromYuv(const Yuv& srcYuv, uint32_t absPartIdx, int partEnum, bool bChroma) | |
114 | { | |
115 | X265_CHECK(m_size == FENC_STRIDE && m_size >= srcYuv.m_size, "PU buffer size mismatch\n"); | |
116 | ||
117 | const pixel* srcY = srcYuv.m_buf[0] + getAddrOffset(absPartIdx, srcYuv.m_size); | |
118 | primitives.luma_copy_pp[partEnum](m_buf[0], m_size, srcY, srcYuv.m_size); | |
119 | ||
120 | if (bChroma) | |
121 | { | |
122 | const pixel* srcU = srcYuv.m_buf[1] + srcYuv.getChromaAddrOffset(absPartIdx); | |
123 | const pixel* srcV = srcYuv.m_buf[2] + srcYuv.getChromaAddrOffset(absPartIdx); | |
124 | primitives.chroma[m_csp].copy_pp[partEnum](m_buf[1], m_csize, srcU, srcYuv.m_csize); | |
125 | primitives.chroma[m_csp].copy_pp[partEnum](m_buf[2], m_csize, srcV, srcYuv.m_csize); | |
126 | } | |
127 | } | |
128 | ||
72b9787e JB |
129 | void Yuv::copyToPartYuv(Yuv& dstYuv, uint32_t absPartIdx) const |
130 | { | |
131 | pixel* dstY = dstYuv.getLumaAddr(absPartIdx); | |
132 | primitives.luma_copy_pp[m_part](dstY, dstYuv.m_size, m_buf[0], m_size); | |
133 | ||
134 | pixel* dstU = dstYuv.getCbAddr(absPartIdx); | |
135 | pixel* dstV = dstYuv.getCrAddr(absPartIdx); | |
136 | primitives.chroma[m_csp].copy_pp[m_part](dstU, dstYuv.m_csize, m_buf[1], m_csize); | |
137 | primitives.chroma[m_csp].copy_pp[m_part](dstV, dstYuv.m_csize, m_buf[2], m_csize); | |
138 | } | |
139 | ||
140 | void Yuv::copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const | |
141 | { | |
142 | pixel* srcY = m_buf[0] + getAddrOffset(absPartIdx, m_size); | |
143 | pixel* dstY = dstYuv.m_buf[0]; | |
72b9787e JB |
144 | primitives.luma_copy_pp[dstYuv.m_part](dstY, dstYuv.m_size, srcY, m_size); |
145 | ||
146 | pixel* srcU = m_buf[1] + getChromaAddrOffset(absPartIdx); | |
147 | pixel* srcV = m_buf[2] + getChromaAddrOffset(absPartIdx); | |
148 | pixel* dstU = dstYuv.m_buf[1]; | |
149 | pixel* dstV = dstYuv.m_buf[2]; | |
150 | primitives.chroma[m_csp].copy_pp[dstYuv.m_part](dstU, dstYuv.m_csize, srcU, m_csize); | |
151 | primitives.chroma[m_csp].copy_pp[dstYuv.m_part](dstV, dstYuv.m_csize, srcV, m_csize); | |
152 | } | |
153 | ||
154 | void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL) | |
155 | { | |
156 | primitives.luma_add_ps[log2SizeL - 2](m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size); | |
157 | primitives.chroma[m_csp].add_ps[log2SizeL - 2](m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize); | |
158 | primitives.chroma[m_csp].add_ps[log2SizeL - 2](m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize); | |
159 | } | |
160 | ||
161 | void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) | |
162 | { | |
163 | int part = partitionFromSizes(width, height); | |
164 | ||
165 | if (bLuma) | |
166 | { | |
b53f7c52 JB |
167 | const int16_t* srcY0 = srcYuv0.getLumaAddr(absPartIdx); |
168 | const int16_t* srcY1 = srcYuv1.getLumaAddr(absPartIdx); | |
72b9787e | 169 | pixel* dstY = getLumaAddr(absPartIdx); |
72b9787e JB |
170 | primitives.luma_addAvg[part](srcY0, srcY1, dstY, srcYuv0.m_size, srcYuv1.m_size, m_size); |
171 | } | |
172 | if (bChroma) | |
173 | { | |
b53f7c52 JB |
174 | const int16_t* srcU0 = srcYuv0.getCbAddr(absPartIdx); |
175 | const int16_t* srcV0 = srcYuv0.getCrAddr(absPartIdx); | |
176 | const int16_t* srcU1 = srcYuv1.getCbAddr(absPartIdx); | |
177 | const int16_t* srcV1 = srcYuv1.getCrAddr(absPartIdx); | |
72b9787e JB |
178 | pixel* dstU = getCbAddr(absPartIdx); |
179 | pixel* dstV = getCrAddr(absPartIdx); | |
72b9787e JB |
180 | primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, srcYuv0.m_csize, srcYuv1.m_csize, m_csize); |
181 | primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, srcYuv0.m_csize, srcYuv1.m_csize, m_csize); | |
182 | } | |
183 | } | |
184 | ||
185 | void Yuv::copyPartToPartLuma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const | |
186 | { | |
187 | const pixel* src = getLumaAddr(absPartIdx); | |
188 | pixel* dst = dstYuv.getLumaAddr(absPartIdx); | |
b53f7c52 | 189 | primitives.luma_copy_pp[log2Size - 2](dst, dstYuv.m_size, src, m_size); |
72b9787e JB |
190 | } |
191 | ||
192 | void Yuv::copyPartToPartChroma(Yuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const | |
193 | { | |
194 | int part = partitionFromLog2Size(log2SizeL); | |
195 | const pixel* srcU = getCbAddr(absPartIdx); | |
196 | const pixel* srcV = getCrAddr(absPartIdx); | |
197 | pixel* dstU = dstYuv.getCbAddr(absPartIdx); | |
198 | pixel* dstV = dstYuv.getCrAddr(absPartIdx); | |
b53f7c52 JB |
199 | primitives.chroma[m_csp].copy_pp[part](dstU, dstYuv.m_csize, srcU, m_csize); |
200 | primitives.chroma[m_csp].copy_pp[part](dstV, dstYuv.m_csize, srcV, m_csize); | |
72b9787e | 201 | } |