1 /*****************************************************************************
2 * Copyright (C) 2014 x265 project
4 * Authors: Steve Borho <steve@borho.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
29 #include "primitives.h"
40 bool Yuv::create(uint32_t size
, int csp
)
43 m_hChromaShift
= CHROMA_H_SHIFT(csp
);
44 m_vChromaShift
= CHROMA_V_SHIFT(csp
);
46 // set width and height
48 m_csize
= size
>> m_hChromaShift
;
49 m_part
= partitionFromSizes(size
, size
);
51 size_t sizeL
= size
* size
;
52 size_t sizeC
= sizeL
>> (m_vChromaShift
+ m_hChromaShift
);
54 X265_CHECK((sizeC
& 15) == 0, "invalid size");
56 // memory allocation (padded for SIMD reads)
57 CHECKED_MALLOC(m_buf
[0], pixel
, sizeL
+ sizeC
* 2 + 8);
58 m_buf
[1] = m_buf
[0] + sizeL
;
59 m_buf
[2] = m_buf
[0] + sizeL
+ sizeC
;
71 void Yuv::copyToPicYuv(PicYuv
& dstPic
, uint32_t cuAddr
, uint32_t absPartIdx
) const
73 pixel
* dstY
= dstPic
.getLumaAddr(cuAddr
, absPartIdx
);
75 primitives
.luma_copy_pp
[m_part
](dstY
, dstPic
.m_stride
, m_buf
[0], m_size
);
77 pixel
* dstU
= dstPic
.getCbAddr(cuAddr
, absPartIdx
);
78 pixel
* dstV
= dstPic
.getCrAddr(cuAddr
, absPartIdx
);
79 primitives
.chroma
[m_csp
].copy_pp
[m_part
](dstU
, dstPic
.m_strideC
, m_buf
[1], m_csize
);
80 primitives
.chroma
[m_csp
].copy_pp
[m_part
](dstV
, dstPic
.m_strideC
, m_buf
[2], m_csize
);
83 void Yuv::copyFromPicYuv(const PicYuv
& srcPic
, uint32_t cuAddr
, uint32_t absPartIdx
)
85 /* We cheat with const_cast internally because the get methods are not capable of
86 * returning const buffers and the primitives are not const aware, but we know
87 * this function does not modify srcPic */
88 PicYuv
& srcPicSafe
= const_cast<PicYuv
&>(srcPic
);
89 pixel
* srcY
= srcPicSafe
.getLumaAddr(cuAddr
, absPartIdx
);
91 primitives
.luma_copy_pp
[m_part
](m_buf
[0], m_size
, srcY
, srcPic
.m_stride
);
93 pixel
* srcU
= srcPicSafe
.getCbAddr(cuAddr
, absPartIdx
);
94 pixel
* srcV
= srcPicSafe
.getCrAddr(cuAddr
, absPartIdx
);
95 primitives
.chroma
[m_csp
].copy_pp
[m_part
](m_buf
[1], m_csize
, srcU
, srcPicSafe
.m_strideC
);
96 primitives
.chroma
[m_csp
].copy_pp
[m_part
](m_buf
[2], m_csize
, srcV
, srcPicSafe
.m_strideC
);
99 void Yuv::copyFromYuv(const Yuv
& srcYuv
)
101 X265_CHECK(m_size
<= srcYuv
.m_size
, "invalid size\n");
103 primitives
.luma_copy_pp
[m_part
](m_buf
[0], m_size
, srcYuv
.m_buf
[0], srcYuv
.m_size
);
104 primitives
.chroma
[m_csp
].copy_pp
[m_part
](m_buf
[1], m_csize
, srcYuv
.m_buf
[1], srcYuv
.m_csize
);
105 primitives
.chroma
[m_csp
].copy_pp
[m_part
](m_buf
[2], m_csize
, srcYuv
.m_buf
[2], srcYuv
.m_csize
);
108 void Yuv::copyToPartYuv(Yuv
& dstYuv
, uint32_t absPartIdx
) const
110 pixel
* dstY
= dstYuv
.getLumaAddr(absPartIdx
);
111 primitives
.luma_copy_pp
[m_part
](dstY
, dstYuv
.m_size
, m_buf
[0], m_size
);
113 pixel
* dstU
= dstYuv
.getCbAddr(absPartIdx
);
114 pixel
* dstV
= dstYuv
.getCrAddr(absPartIdx
);
115 primitives
.chroma
[m_csp
].copy_pp
[m_part
](dstU
, dstYuv
.m_csize
, m_buf
[1], m_csize
);
116 primitives
.chroma
[m_csp
].copy_pp
[m_part
](dstV
, dstYuv
.m_csize
, m_buf
[2], m_csize
);
119 void Yuv::copyPartToYuv(Yuv
& dstYuv
, uint32_t absPartIdx
) const
121 pixel
* srcY
= m_buf
[0] + getAddrOffset(absPartIdx
, m_size
);
122 pixel
* dstY
= dstYuv
.m_buf
[0];
124 primitives
.luma_copy_pp
[dstYuv
.m_part
](dstY
, dstYuv
.m_size
, srcY
, m_size
);
126 pixel
* srcU
= m_buf
[1] + getChromaAddrOffset(absPartIdx
);
127 pixel
* srcV
= m_buf
[2] + getChromaAddrOffset(absPartIdx
);
128 pixel
* dstU
= dstYuv
.m_buf
[1];
129 pixel
* dstV
= dstYuv
.m_buf
[2];
130 primitives
.chroma
[m_csp
].copy_pp
[dstYuv
.m_part
](dstU
, dstYuv
.m_csize
, srcU
, m_csize
);
131 primitives
.chroma
[m_csp
].copy_pp
[dstYuv
.m_part
](dstV
, dstYuv
.m_csize
, srcV
, m_csize
);
134 void Yuv::addClip(const Yuv
& srcYuv0
, const ShortYuv
& srcYuv1
, uint32_t log2SizeL
)
136 primitives
.luma_add_ps
[log2SizeL
- 2](m_buf
[0], m_size
, srcYuv0
.m_buf
[0], srcYuv1
.m_buf
[0], srcYuv0
.m_size
, srcYuv1
.m_size
);
137 primitives
.chroma
[m_csp
].add_ps
[log2SizeL
- 2](m_buf
[1], m_csize
, srcYuv0
.m_buf
[1], srcYuv1
.m_buf
[1], srcYuv0
.m_csize
, srcYuv1
.m_csize
);
138 primitives
.chroma
[m_csp
].add_ps
[log2SizeL
- 2](m_buf
[2], m_csize
, srcYuv0
.m_buf
[2], srcYuv1
.m_buf
[2], srcYuv0
.m_csize
, srcYuv1
.m_csize
);
141 void Yuv::addAvg(const ShortYuv
& srcYuv0
, const ShortYuv
& srcYuv1
, uint32_t absPartIdx
, uint32_t width
, uint32_t height
, bool bLuma
, bool bChroma
)
143 int part
= partitionFromSizes(width
, height
);
147 int16_t* srcY0
= const_cast<ShortYuv
&>(srcYuv0
).getLumaAddr(absPartIdx
);
148 int16_t* srcY1
= const_cast<ShortYuv
&>(srcYuv1
).getLumaAddr(absPartIdx
);
149 pixel
* dstY
= getLumaAddr(absPartIdx
);
151 primitives
.luma_addAvg
[part
](srcY0
, srcY1
, dstY
, srcYuv0
.m_size
, srcYuv1
.m_size
, m_size
);
155 int16_t* srcU0
= const_cast<ShortYuv
&>(srcYuv0
).getCbAddr(absPartIdx
);
156 int16_t* srcV0
= const_cast<ShortYuv
&>(srcYuv0
).getCrAddr(absPartIdx
);
157 int16_t* srcU1
= const_cast<ShortYuv
&>(srcYuv1
).getCbAddr(absPartIdx
);
158 int16_t* srcV1
= const_cast<ShortYuv
&>(srcYuv1
).getCrAddr(absPartIdx
);
159 pixel
* dstU
= getCbAddr(absPartIdx
);
160 pixel
* dstV
= getCrAddr(absPartIdx
);
162 primitives
.chroma
[m_csp
].addAvg
[part
](srcU0
, srcU1
, dstU
, srcYuv0
.m_csize
, srcYuv1
.m_csize
, m_csize
);
163 primitives
.chroma
[m_csp
].addAvg
[part
](srcV0
, srcV1
, dstV
, srcYuv0
.m_csize
, srcYuv1
.m_csize
, m_csize
);
167 void Yuv::copyPartToPartLuma(Yuv
& dstYuv
, uint32_t absPartIdx
, uint32_t log2Size
) const
169 const pixel
* src
= getLumaAddr(absPartIdx
);
170 pixel
* dst
= dstYuv
.getLumaAddr(absPartIdx
);
171 primitives
.square_copy_pp
[log2Size
- 2](dst
, dstYuv
.m_size
, const_cast<pixel
*>(src
), m_size
);
174 void Yuv::copyPartToPartChroma(Yuv
& dstYuv
, uint32_t absPartIdx
, uint32_t log2SizeL
) const
176 int part
= partitionFromLog2Size(log2SizeL
);
177 const pixel
* srcU
= getCbAddr(absPartIdx
);
178 const pixel
* srcV
= getCrAddr(absPartIdx
);
179 pixel
* dstU
= dstYuv
.getCbAddr(absPartIdx
);
180 pixel
* dstV
= dstYuv
.getCrAddr(absPartIdx
);
182 primitives
.chroma
[m_csp
].copy_pp
[part
](dstU
, dstYuv
.m_csize
, const_cast<pixel
*>(srcU
), m_csize
);
183 primitives
.chroma
[m_csp
].copy_pp
[part
](dstV
, dstYuv
.m_csize
, const_cast<pixel
*>(srcV
), m_csize
);