eedb5b2f6fa6e88c0b5e7dfc428e904b4d8045ba
1 /*****************************************************************************
2 * Copyright (C) 2014 x265 project
4 * Authors: Steve Borho <steve@borho.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
29 #include "primitives.h"
40 bool Yuv::create(uint32_t size
, int csp
)
43 m_hChromaShift
= CHROMA_H_SHIFT(csp
);
44 m_vChromaShift
= CHROMA_V_SHIFT(csp
);
47 m_part
= partitionFromSizes(size
, size
);
49 if (csp
== X265_CSP_I400
)
51 CHECKED_MALLOC(m_buf
[0], pixel
, size
* size
+ 8);
52 m_buf
[1] = m_buf
[2] = 0;
58 m_csize
= size
>> m_hChromaShift
;
60 size_t sizeL
= size
* size
;
61 size_t sizeC
= sizeL
>> (m_vChromaShift
+ m_hChromaShift
);
63 X265_CHECK((sizeC
& 15) == 0, "invalid size");
65 // memory allocation (padded for SIMD reads)
66 CHECKED_MALLOC(m_buf
[0], pixel
, sizeL
+ sizeC
* 2 + 8);
67 m_buf
[1] = m_buf
[0] + sizeL
;
68 m_buf
[2] = m_buf
[0] + sizeL
+ sizeC
;
81 void Yuv::copyToPicYuv(PicYuv
& dstPic
, uint32_t cuAddr
, uint32_t absPartIdx
) const
83 pixel
* dstY
= dstPic
.getLumaAddr(cuAddr
, absPartIdx
);
84 primitives
.luma_copy_pp
[m_part
](dstY
, dstPic
.m_stride
, m_buf
[0], m_size
);
86 pixel
* dstU
= dstPic
.getCbAddr(cuAddr
, absPartIdx
);
87 pixel
* dstV
= dstPic
.getCrAddr(cuAddr
, absPartIdx
);
88 primitives
.chroma
[m_csp
].copy_pp
[m_part
](dstU
, dstPic
.m_strideC
, m_buf
[1], m_csize
);
89 primitives
.chroma
[m_csp
].copy_pp
[m_part
](dstV
, dstPic
.m_strideC
, m_buf
[2], m_csize
);
92 void Yuv::copyFromPicYuv(const PicYuv
& srcPic
, uint32_t cuAddr
, uint32_t absPartIdx
)
94 const pixel
* srcY
= srcPic
.getLumaAddr(cuAddr
, absPartIdx
);
95 primitives
.luma_copy_pp
[m_part
](m_buf
[0], m_size
, srcY
, srcPic
.m_stride
);
97 const pixel
* srcU
= srcPic
.getCbAddr(cuAddr
, absPartIdx
);
98 const pixel
* srcV
= srcPic
.getCrAddr(cuAddr
, absPartIdx
);
99 primitives
.chroma
[m_csp
].copy_pp
[m_part
](m_buf
[1], m_csize
, srcU
, srcPic
.m_strideC
);
100 primitives
.chroma
[m_csp
].copy_pp
[m_part
](m_buf
[2], m_csize
, srcV
, srcPic
.m_strideC
);
103 void Yuv::copyFromYuv(const Yuv
& srcYuv
)
105 X265_CHECK(m_size
>= srcYuv
.m_size
, "invalid size\n");
107 primitives
.luma_copy_pp
[m_part
](m_buf
[0], m_size
, srcYuv
.m_buf
[0], srcYuv
.m_size
);
108 primitives
.chroma
[m_csp
].copy_pp
[m_part
](m_buf
[1], m_csize
, srcYuv
.m_buf
[1], srcYuv
.m_csize
);
109 primitives
.chroma
[m_csp
].copy_pp
[m_part
](m_buf
[2], m_csize
, srcYuv
.m_buf
[2], srcYuv
.m_csize
);
112 /* This version is intended for use by ME, which required FENC_STRIDE for luma fenc pixels */
113 void Yuv::copyPUFromYuv(const Yuv
& srcYuv
, uint32_t absPartIdx
, int partEnum
, bool bChroma
)
115 X265_CHECK(m_size
== FENC_STRIDE
&& m_size
>= srcYuv
.m_size
, "PU buffer size mismatch\n");
117 const pixel
* srcY
= srcYuv
.m_buf
[0] + getAddrOffset(absPartIdx
, srcYuv
.m_size
);
118 primitives
.luma_copy_pp
[partEnum
](m_buf
[0], m_size
, srcY
, srcYuv
.m_size
);
122 const pixel
* srcU
= srcYuv
.m_buf
[1] + srcYuv
.getChromaAddrOffset(absPartIdx
);
123 const pixel
* srcV
= srcYuv
.m_buf
[2] + srcYuv
.getChromaAddrOffset(absPartIdx
);
124 primitives
.chroma
[m_csp
].copy_pp
[partEnum
](m_buf
[1], m_csize
, srcU
, srcYuv
.m_csize
);
125 primitives
.chroma
[m_csp
].copy_pp
[partEnum
](m_buf
[2], m_csize
, srcV
, srcYuv
.m_csize
);
129 void Yuv::copyToPartYuv(Yuv
& dstYuv
, uint32_t absPartIdx
) const
131 pixel
* dstY
= dstYuv
.getLumaAddr(absPartIdx
);
132 primitives
.luma_copy_pp
[m_part
](dstY
, dstYuv
.m_size
, m_buf
[0], m_size
);
134 pixel
* dstU
= dstYuv
.getCbAddr(absPartIdx
);
135 pixel
* dstV
= dstYuv
.getCrAddr(absPartIdx
);
136 primitives
.chroma
[m_csp
].copy_pp
[m_part
](dstU
, dstYuv
.m_csize
, m_buf
[1], m_csize
);
137 primitives
.chroma
[m_csp
].copy_pp
[m_part
](dstV
, dstYuv
.m_csize
, m_buf
[2], m_csize
);
140 void Yuv::copyPartToYuv(Yuv
& dstYuv
, uint32_t absPartIdx
) const
142 pixel
* srcY
= m_buf
[0] + getAddrOffset(absPartIdx
, m_size
);
143 pixel
* dstY
= dstYuv
.m_buf
[0];
144 primitives
.luma_copy_pp
[dstYuv
.m_part
](dstY
, dstYuv
.m_size
, srcY
, m_size
);
146 pixel
* srcU
= m_buf
[1] + getChromaAddrOffset(absPartIdx
);
147 pixel
* srcV
= m_buf
[2] + getChromaAddrOffset(absPartIdx
);
148 pixel
* dstU
= dstYuv
.m_buf
[1];
149 pixel
* dstV
= dstYuv
.m_buf
[2];
150 primitives
.chroma
[m_csp
].copy_pp
[dstYuv
.m_part
](dstU
, dstYuv
.m_csize
, srcU
, m_csize
);
151 primitives
.chroma
[m_csp
].copy_pp
[dstYuv
.m_part
](dstV
, dstYuv
.m_csize
, srcV
, m_csize
);
154 void Yuv::addClip(const Yuv
& srcYuv0
, const ShortYuv
& srcYuv1
, uint32_t log2SizeL
)
156 primitives
.luma_add_ps
[log2SizeL
- 2](m_buf
[0], m_size
, srcYuv0
.m_buf
[0], srcYuv1
.m_buf
[0], srcYuv0
.m_size
, srcYuv1
.m_size
);
157 primitives
.chroma
[m_csp
].add_ps
[log2SizeL
- 2](m_buf
[1], m_csize
, srcYuv0
.m_buf
[1], srcYuv1
.m_buf
[1], srcYuv0
.m_csize
, srcYuv1
.m_csize
);
158 primitives
.chroma
[m_csp
].add_ps
[log2SizeL
- 2](m_buf
[2], m_csize
, srcYuv0
.m_buf
[2], srcYuv1
.m_buf
[2], srcYuv0
.m_csize
, srcYuv1
.m_csize
);
161 void Yuv::addAvg(const ShortYuv
& srcYuv0
, const ShortYuv
& srcYuv1
, uint32_t absPartIdx
, uint32_t width
, uint32_t height
, bool bLuma
, bool bChroma
)
163 int part
= partitionFromSizes(width
, height
);
167 const int16_t* srcY0
= srcYuv0
.getLumaAddr(absPartIdx
);
168 const int16_t* srcY1
= srcYuv1
.getLumaAddr(absPartIdx
);
169 pixel
* dstY
= getLumaAddr(absPartIdx
);
170 primitives
.luma_addAvg
[part
](srcY0
, srcY1
, dstY
, srcYuv0
.m_size
, srcYuv1
.m_size
, m_size
);
174 const int16_t* srcU0
= srcYuv0
.getCbAddr(absPartIdx
);
175 const int16_t* srcV0
= srcYuv0
.getCrAddr(absPartIdx
);
176 const int16_t* srcU1
= srcYuv1
.getCbAddr(absPartIdx
);
177 const int16_t* srcV1
= srcYuv1
.getCrAddr(absPartIdx
);
178 pixel
* dstU
= getCbAddr(absPartIdx
);
179 pixel
* dstV
= getCrAddr(absPartIdx
);
180 primitives
.chroma
[m_csp
].addAvg
[part
](srcU0
, srcU1
, dstU
, srcYuv0
.m_csize
, srcYuv1
.m_csize
, m_csize
);
181 primitives
.chroma
[m_csp
].addAvg
[part
](srcV0
, srcV1
, dstV
, srcYuv0
.m_csize
, srcYuv1
.m_csize
, m_csize
);
185 void Yuv::copyPartToPartLuma(Yuv
& dstYuv
, uint32_t absPartIdx
, uint32_t log2Size
) const
187 const pixel
* src
= getLumaAddr(absPartIdx
);
188 pixel
* dst
= dstYuv
.getLumaAddr(absPartIdx
);
189 primitives
.luma_copy_pp
[log2Size
- 2](dst
, dstYuv
.m_size
, src
, m_size
);
192 void Yuv::copyPartToPartChroma(Yuv
& dstYuv
, uint32_t absPartIdx
, uint32_t log2SizeL
) const
194 int part
= partitionFromLog2Size(log2SizeL
);
195 const pixel
* srcU
= getCbAddr(absPartIdx
);
196 const pixel
* srcV
= getCrAddr(absPartIdx
);
197 pixel
* dstU
= dstYuv
.getCbAddr(absPartIdx
);
198 pixel
* dstV
= dstYuv
.getCrAddr(absPartIdx
);
199 primitives
.chroma
[m_csp
].copy_pp
[part
](dstU
, dstYuv
.m_csize
, srcU
, m_csize
);
200 primitives
.chroma
[m_csp
].copy_pp
[part
](dstV
, dstYuv
.m_csize
, srcV
, m_csize
);