1 /*****************************************************************************
2 * Copyright (C) 2014 x265 project
4 * Authors: Steve Borho <steve@borho.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 * This program is also available under a commercial proprietary license.
21 * For more information, contact us at license @ x265.com.
22 *****************************************************************************/
27 #include "primitives.h"
47 bool PicYuv::create(uint32_t picWidth
, uint32_t picHeight
, uint32_t picCsp
)
49 m_picWidth
= picWidth
;
50 m_picHeight
= picHeight
;
51 m_hChromaShift
= CHROMA_H_SHIFT(picCsp
);
52 m_vChromaShift
= CHROMA_V_SHIFT(picCsp
);
55 uint32_t numCuInWidth
= (m_picWidth
+ g_maxCUSize
- 1) / g_maxCUSize
;
56 uint32_t numCuInHeight
= (m_picHeight
+ g_maxCUSize
- 1) / g_maxCUSize
;
58 m_lumaMarginX
= g_maxCUSize
+ 32; // search margin and 8-tap filter half-length, padded for 32-byte alignment
59 m_lumaMarginY
= g_maxCUSize
+ 16; // margin for 8-tap filter and infinite padding
60 m_stride
= (numCuInWidth
* g_maxCUSize
) + (m_lumaMarginX
<< 1);
62 m_chromaMarginX
= m_lumaMarginX
; // keep 16-byte alignment for chroma CTUs
63 m_chromaMarginY
= m_lumaMarginY
>> m_vChromaShift
;
65 m_strideC
= ((numCuInWidth
* g_maxCUSize
) >> m_hChromaShift
) + (m_chromaMarginX
* 2);
66 int maxHeight
= numCuInHeight
* g_maxCUSize
;
68 CHECKED_MALLOC(m_picBuf
[0], pixel
, m_stride
* (maxHeight
+ (m_lumaMarginY
* 2)));
69 CHECKED_MALLOC(m_picBuf
[1], pixel
, m_strideC
* ((maxHeight
>> m_vChromaShift
) + (m_chromaMarginY
* 2)));
70 CHECKED_MALLOC(m_picBuf
[2], pixel
, m_strideC
* ((maxHeight
>> m_vChromaShift
) + (m_chromaMarginY
* 2)));
72 m_picOrg
[0] = m_picBuf
[0] + m_lumaMarginY
* m_stride
+ m_lumaMarginX
;
73 m_picOrg
[1] = m_picBuf
[1] + m_chromaMarginY
* m_strideC
+ m_chromaMarginX
;
74 m_picOrg
[2] = m_picBuf
[2] + m_chromaMarginY
* m_strideC
+ m_chromaMarginX
;
82 /* the first picture allocated by the encoder will be asked to generate these
83 * offset arrays. Once generated, they will be provided to all future PicYuv
84 * allocated by the same encoder. */
85 bool PicYuv::createOffsets(const SPS
& sps
)
87 uint32_t numPartitions
= 1 << (g_maxFullDepth
* 2);
88 CHECKED_MALLOC(m_cuOffsetY
, intptr_t, sps
.numCuInWidth
* sps
.numCuInHeight
);
89 CHECKED_MALLOC(m_cuOffsetC
, intptr_t, sps
.numCuInWidth
* sps
.numCuInHeight
);
90 for (uint32_t cuRow
= 0; cuRow
< sps
.numCuInHeight
; cuRow
++)
92 for (uint32_t cuCol
= 0; cuCol
< sps
.numCuInWidth
; cuCol
++)
94 m_cuOffsetY
[cuRow
* sps
.numCuInWidth
+ cuCol
] = m_stride
* cuRow
* g_maxCUSize
+ cuCol
* g_maxCUSize
;
95 m_cuOffsetC
[cuRow
* sps
.numCuInWidth
+ cuCol
] = m_strideC
* cuRow
* (g_maxCUSize
>> m_vChromaShift
) + cuCol
* (g_maxCUSize
>> m_hChromaShift
);
99 CHECKED_MALLOC(m_buOffsetY
, intptr_t, (size_t)numPartitions
);
100 CHECKED_MALLOC(m_buOffsetC
, intptr_t, (size_t)numPartitions
);
101 for (uint32_t idx
= 0; idx
< numPartitions
; ++idx
)
103 intptr_t x
= g_zscanToPelX
[idx
];
104 intptr_t y
= g_zscanToPelY
[idx
];
105 m_buOffsetY
[idx
] = m_stride
* y
+ x
;
106 m_buOffsetC
[idx
] = m_strideC
* (y
>> m_vChromaShift
) + (x
>> m_hChromaShift
);
115 void PicYuv::destroy()
117 X265_FREE(m_picBuf
[0]);
118 X265_FREE(m_picBuf
[1]);
119 X265_FREE(m_picBuf
[2]);
122 /* Copy pixels from an x265_picture into internal PicYuv instance.
123 * Shift pixels as necessary, mask off bits above X265_DEPTH for safety. */
124 void PicYuv::copyFromPicture(const x265_picture
& pic
, int padx
, int pady
)
126 /* m_picWidth is the width that is being encoded, padx indicates how many
127 * of those pixels are padding to reach multiple of MinCU(4) size.
129 * Internally, we need to extend rows out to a multiple of 16 for lowres
130 * downscale and other operations. But those padding pixels are never
133 * The same applies to m_picHeight and pady */
135 /* width and height - without padsize (input picture raw width and height) */
136 int width
= m_picWidth
- padx
;
137 int height
= m_picHeight
- pady
;
139 /* internal pad to multiple of 16x16 blocks */
140 uint8_t rem
= width
& 15;
142 padx
= rem
? 16 - rem
: padx
;
144 pady
= rem
? 16 - rem
: pady
;
146 /* add one more row and col of pad for downscale interpolation, fixes
147 * warnings from valgrind about using uninitialized pixels */
151 if (pic
.bitDepth
< X265_DEPTH
)
153 pixel
*yPixel
= m_picOrg
[0];
154 pixel
*uPixel
= m_picOrg
[1];
155 pixel
*vPixel
= m_picOrg
[2];
157 uint8_t *yChar
= (uint8_t*)pic
.planes
[0];
158 uint8_t *uChar
= (uint8_t*)pic
.planes
[1];
159 uint8_t *vChar
= (uint8_t*)pic
.planes
[2];
160 int shift
= X265_MAX(0, X265_DEPTH
- pic
.bitDepth
);
162 primitives
.planecopy_cp(yChar
, pic
.stride
[0] / sizeof(*yChar
), yPixel
, m_stride
, width
, height
, shift
);
163 primitives
.planecopy_cp(uChar
, pic
.stride
[1] / sizeof(*uChar
), uPixel
, m_strideC
, width
>> m_hChromaShift
, height
>> m_vChromaShift
, shift
);
164 primitives
.planecopy_cp(vChar
, pic
.stride
[2] / sizeof(*vChar
), vPixel
, m_strideC
, width
>> m_hChromaShift
, height
>> m_vChromaShift
, shift
);
166 else if (pic
.bitDepth
== 8)
168 pixel
*yPixel
= m_picOrg
[0];
169 pixel
*uPixel
= m_picOrg
[1];
170 pixel
*vPixel
= m_picOrg
[2];
172 uint8_t *yChar
= (uint8_t*)pic
.planes
[0];
173 uint8_t *uChar
= (uint8_t*)pic
.planes
[1];
174 uint8_t *vChar
= (uint8_t*)pic
.planes
[2];
176 for (int r
= 0; r
< height
; r
++)
178 for (int c
= 0; c
< width
; c
++)
180 yPixel
[c
] = (pixel
)yChar
[c
];
184 yChar
+= pic
.stride
[0] / sizeof(*yChar
);
187 for (int r
= 0; r
< height
>> m_vChromaShift
; r
++)
189 for (int c
= 0; c
< width
>> m_hChromaShift
; c
++)
191 uPixel
[c
] = (pixel
)uChar
[c
];
192 vPixel
[c
] = (pixel
)vChar
[c
];
197 uChar
+= pic
.stride
[1] / sizeof(*uChar
);
198 vChar
+= pic
.stride
[2] / sizeof(*vChar
);
201 else /* pic.bitDepth > 8 */
203 pixel
*yPixel
= m_picOrg
[0];
204 pixel
*uPixel
= m_picOrg
[1];
205 pixel
*vPixel
= m_picOrg
[2];
207 uint16_t *yShort
= (uint16_t*)pic
.planes
[0];
208 uint16_t *uShort
= (uint16_t*)pic
.planes
[1];
209 uint16_t *vShort
= (uint16_t*)pic
.planes
[2];
211 /* defensive programming, mask off bits that are supposed to be zero */
212 uint16_t mask
= (1 << X265_DEPTH
) - 1;
213 int shift
= X265_MAX(0, pic
.bitDepth
- X265_DEPTH
);
215 /* shift and mask pixels to final size */
217 primitives
.planecopy_sp(yShort
, pic
.stride
[0] / sizeof(*yShort
), yPixel
, m_stride
, width
, height
, shift
, mask
);
218 primitives
.planecopy_sp(uShort
, pic
.stride
[1] / sizeof(*uShort
), uPixel
, m_strideC
, width
>> m_hChromaShift
, height
>> m_vChromaShift
, shift
, mask
);
219 primitives
.planecopy_sp(vShort
, pic
.stride
[2] / sizeof(*vShort
), vPixel
, m_strideC
, width
>> m_hChromaShift
, height
>> m_vChromaShift
, shift
, mask
);
222 /* extend the right edge if width was not multiple of the minimum CU size */
225 pixel
*Y
= m_picOrg
[0];
226 pixel
*U
= m_picOrg
[1];
227 pixel
*V
= m_picOrg
[2];
229 for (int r
= 0; r
< height
; r
++)
231 for (int x
= 0; x
< padx
; x
++)
233 Y
[width
+ x
] = Y
[width
- 1];
239 for (int r
= 0; r
< height
>> m_vChromaShift
; r
++)
241 for (int x
= 0; x
< padx
>> m_hChromaShift
; x
++)
243 U
[(width
>> m_hChromaShift
) + x
] = U
[(width
>> m_hChromaShift
) - 1];
244 V
[(width
>> m_hChromaShift
) + x
] = V
[(width
>> m_hChromaShift
) - 1];
252 /* extend the bottom if height was not multiple of the minimum CU size */
255 pixel
*Y
= m_picOrg
[0] + (height
- 1) * m_stride
;
256 pixel
*U
= m_picOrg
[1] + ((height
>> m_vChromaShift
) - 1) * m_strideC
;
257 pixel
*V
= m_picOrg
[2] + ((height
>> m_vChromaShift
) - 1) * m_strideC
;
259 for (int i
= 1; i
<= pady
; i
++)
261 memcpy(Y
+ i
* m_stride
, Y
, (width
+ padx
) * sizeof(pixel
));
264 for (int j
= 1; j
<= pady
>> m_vChromaShift
; j
++)
266 memcpy(U
+ j
* m_strideC
, U
, ((width
+ padx
) >> m_hChromaShift
) * sizeof(pixel
));
267 memcpy(V
+ j
* m_strideC
, V
, ((width
+ padx
) >> m_hChromaShift
) * sizeof(pixel
));
274 template<uint32_t OUTPUT_BITDEPTH_DIV8
>
275 static void md5_block(MD5Context
& md5
, const pixel
* plane
, uint32_t n
)
277 /* create a 64 byte buffer for packing pixel's into */
278 uint8_t buf
[64 / OUTPUT_BITDEPTH_DIV8
][OUTPUT_BITDEPTH_DIV8
];
280 for (uint32_t i
= 0; i
< n
; i
++)
282 pixel pel
= plane
[i
];
283 /* perform bitdepth and endian conversion */
284 for (uint32_t d
= 0; d
< OUTPUT_BITDEPTH_DIV8
; d
++)
285 buf
[i
][d
] = (uint8_t)(pel
>> (d
* 8));
288 MD5Update(&md5
, (uint8_t*)buf
, n
* OUTPUT_BITDEPTH_DIV8
);
291 /* Update md5 with all samples in plane in raster order, each sample
292 * is adjusted to OUTBIT_BITDEPTH_DIV8 */
293 template<uint32_t OUTPUT_BITDEPTH_DIV8
>
294 static void md5_plane(MD5Context
& md5
, const pixel
* plane
, uint32_t width
, uint32_t height
, intptr_t stride
)
296 /* N is the number of samples to process per md5 update.
297 * All N samples must fit in buf */
299 uint32_t width_modN
= width
% N
;
300 uint32_t width_less_modN
= width
- width_modN
;
302 for (uint32_t y
= 0; y
< height
; y
++)
304 /* convert pel's into uint32_t chars in little endian byte order.
305 * NB, for 8bit data, data is truncated to 8bits. */
306 for (uint32_t x
= 0; x
< width_less_modN
; x
+= N
)
307 md5_block
<OUTPUT_BITDEPTH_DIV8
>(md5
, &plane
[y
* stride
+ x
], N
);
309 /* mop up any of the remaining line */
310 md5_block
<OUTPUT_BITDEPTH_DIV8
>(md5
, &plane
[y
* stride
+ width_less_modN
], width_modN
);
314 void updateCRC(const pixel
* plane
, uint32_t& crcVal
, uint32_t height
, uint32_t width
, intptr_t stride
)
320 for (uint32_t y
= 0; y
< height
; y
++)
322 for (uint32_t x
= 0; x
< width
; x
++)
324 // take CRC of first pictureData byte
325 for (bitIdx
= 0; bitIdx
< 8; bitIdx
++)
327 crcMsb
= (crcVal
>> 15) & 1;
328 bitVal
= (plane
[y
* stride
+ x
] >> (7 - bitIdx
)) & 1;
329 crcVal
= (((crcVal
<< 1) + bitVal
) & 0xffff) ^ (crcMsb
* 0x1021);
333 #pragma warning(disable: 4127) // conditional expression is constant
335 // take CRC of second pictureData byte if bit depth is greater than 8-bits
338 for (bitIdx
= 0; bitIdx
< 8; bitIdx
++)
340 crcMsb
= (crcVal
>> 15) & 1;
341 bitVal
= (plane
[y
* stride
+ x
] >> (15 - bitIdx
)) & 1;
342 crcVal
= (((crcVal
<< 1) + bitVal
) & 0xffff) ^ (crcMsb
* 0x1021);
349 void crcFinish(uint32_t& crcVal
, uint8_t digest
[16])
353 for (int bitIdx
= 0; bitIdx
< 16; bitIdx
++)
355 crcMsb
= (crcVal
>> 15) & 1;
356 crcVal
= ((crcVal
<< 1) & 0xffff) ^ (crcMsb
* 0x1021);
359 digest
[0] = (crcVal
>> 8) & 0xff;
360 digest
[1] = crcVal
& 0xff;
363 void updateChecksum(const pixel
* plane
, uint32_t& checksumVal
, uint32_t height
, uint32_t width
, intptr_t stride
, int row
, uint32_t cuHeight
)
367 for (uint32_t y
= row
* cuHeight
; y
< ((row
* cuHeight
) + height
); y
++)
369 for (uint32_t x
= 0; x
< width
; x
++)
371 xor_mask
= (uint8_t)((x
& 0xff) ^ (y
& 0xff) ^ (x
>> 8) ^ (y
>> 8));
372 checksumVal
= (checksumVal
+ ((plane
[y
* stride
+ x
] & 0xff) ^ xor_mask
)) & 0xffffffff;
375 checksumVal
= (checksumVal
+ ((plane
[y
* stride
+ x
] >> 7 >> 1) ^ xor_mask
)) & 0xffffffff;
380 void checksumFinish(uint32_t checksum
, uint8_t digest
[16])
382 digest
[0] = (checksum
>> 24) & 0xff;
383 digest
[1] = (checksum
>> 16) & 0xff;
384 digest
[2] = (checksum
>> 8) & 0xff;
385 digest
[3] = checksum
& 0xff;
388 void updateMD5Plane(MD5Context
& md5
, const pixel
* plane
, uint32_t width
, uint32_t height
, intptr_t stride
)
390 /* choose an md5_plane packing function based on the system bitdepth */
391 typedef void(*MD5PlaneFunc
)(MD5Context
&, const pixel
*, uint32_t, uint32_t, intptr_t);
392 MD5PlaneFunc md5_plane_func
;
393 md5_plane_func
= X265_DEPTH
<= 8 ? (MD5PlaneFunc
)md5_plane
<1> : (MD5PlaneFunc
)md5_plane
<2>;
395 md5_plane_func(md5
, plane
, width
, height
, stride
);