Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com. | |
22 | *****************************************************************************/ | |
23 | ||
24 | #include "common.h" | |
25 | #include "slice.h" | |
26 | #include "framedata.h" | |
27 | #include "picyuv.h" | |
28 | #include "predict.h" | |
29 | #include "primitives.h" | |
30 | ||
31 | using namespace x265; | |
32 | ||
33 | namespace | |
34 | { | |
35 | inline pixel weightBidir(int w0, int16_t P0, int w1, int16_t P1, int round, int shift, int offset) | |
36 | { | |
37 | return Clip((w0 * (P0 + IF_INTERNAL_OFFS) + w1 * (P1 + IF_INTERNAL_OFFS) + round + (offset << (shift - 1))) >> shift); | |
38 | } | |
39 | } | |
40 | ||
41 | Predict::Predict() | |
42 | { | |
43 | m_predBuf = NULL; | |
44 | m_refAbove = NULL; | |
45 | m_refAboveFlt = NULL; | |
46 | m_refLeft = NULL; | |
47 | m_refLeftFlt = NULL; | |
48 | m_immedVals = NULL; | |
49 | } | |
50 | ||
51 | Predict::~Predict() | |
52 | { | |
53 | X265_FREE(m_predBuf); | |
54 | X265_FREE(m_refAbove); | |
55 | X265_FREE(m_immedVals); | |
56 | m_predShortYuv[0].destroy(); | |
57 | m_predShortYuv[1].destroy(); | |
58 | } | |
59 | ||
60 | bool Predict::allocBuffers(int csp) | |
61 | { | |
62 | m_csp = csp; | |
63 | m_hChromaShift = CHROMA_H_SHIFT(csp); | |
64 | m_vChromaShift = CHROMA_V_SHIFT(csp); | |
65 | ||
66 | int predBufHeight = ((MAX_CU_SIZE + 2) << 4); | |
67 | int predBufStride = ((MAX_CU_SIZE + 8) << 4); | |
68 | CHECKED_MALLOC(m_predBuf, pixel, predBufStride * predBufHeight); | |
69 | CHECKED_MALLOC(m_immedVals, int16_t, 64 * (64 + NTAPS_LUMA - 1)); | |
70 | CHECKED_MALLOC(m_refAbove, pixel, 12 * MAX_CU_SIZE); | |
71 | ||
72 | m_refAboveFlt = m_refAbove + 3 * MAX_CU_SIZE; | |
73 | m_refLeft = m_refAboveFlt + 3 * MAX_CU_SIZE; | |
74 | m_refLeftFlt = m_refLeft + 3 * MAX_CU_SIZE; | |
75 | ||
76 | return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp); | |
77 | ||
78 | fail: | |
79 | return false; | |
80 | } | |
81 | ||
82 | void Predict::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSize) | |
83 | { | |
84 | int tuSize = 1 << log2TrSize; | |
85 | ||
86 | pixel *refLft, *refAbv; | |
87 | ||
88 | if (!(g_intraFilterFlags[dirMode] & tuSize)) | |
89 | { | |
90 | refLft = m_refLeft + tuSize - 1; | |
91 | refAbv = m_refAbove + tuSize - 1; | |
92 | } | |
93 | else | |
94 | { | |
95 | refLft = m_refLeftFlt + tuSize - 1; | |
96 | refAbv = m_refAboveFlt + tuSize - 1; | |
97 | } | |
98 | ||
99 | bool bFilter = log2TrSize <= 4; | |
100 | int sizeIdx = log2TrSize - 2; | |
101 | X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n"); | |
102 | primitives.intra_pred[dirMode][sizeIdx](dst, stride, refLft, refAbv, dirMode, bFilter); | |
103 | } | |
104 | ||
105 | void Predict::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC, int chFmt) | |
106 | { | |
107 | int tuSize = 1 << log2TrSizeC; | |
108 | int tuSize2 = tuSize << 1; | |
109 | ||
110 | // Create the prediction | |
111 | const int bufOffset = tuSize - 1; | |
112 | pixel buf0[3 * MAX_CU_SIZE]; | |
113 | pixel buf1[3 * MAX_CU_SIZE]; | |
114 | pixel* above; | |
115 | pixel* left = buf0 + bufOffset; | |
116 | ||
117 | int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1); | |
118 | for (int k = 0; k < limit; k++) | |
119 | left[k] = src[k * ADI_BUF_STRIDE]; | |
120 | ||
121 | if (chFmt == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize)) | |
122 | { | |
123 | // generate filtered intra prediction samples | |
124 | buf0[bufOffset - 1] = src[1]; | |
125 | left = buf1 + bufOffset; | |
126 | for (int i = 0; i < tuSize2; i++) | |
127 | left[i] = (buf0[bufOffset + i - 1] + 2 * buf0[bufOffset + i] + buf0[bufOffset + i + 1] + 2) >> 2; | |
128 | left[tuSize2] = buf0[bufOffset + tuSize2]; | |
129 | ||
130 | above = buf0 + bufOffset; | |
131 | above[0] = left[0]; | |
132 | for (int i = 1; i < tuSize2; i++) | |
133 | above[i] = (src[i - 1] + 2 * src[i] + src[i + 1] + 2) >> 2; | |
134 | above[tuSize2] = src[tuSize2]; | |
135 | } | |
136 | else | |
137 | { | |
138 | above = buf1 + bufOffset; | |
139 | memcpy(above, src, (tuSize2 + 1) * sizeof(pixel)); | |
140 | } | |
141 | ||
142 | int sizeIdx = log2TrSizeC - 2; | |
143 | X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n"); | |
144 | primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 0); | |
145 | } | |
146 | ||
147 | void Predict::initMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx) | |
148 | { | |
149 | m_predSlice = cu.m_slice; | |
150 | cu.getPartIndexAndSize(partIdx, m_puAbsPartIdx, m_puWidth, m_puHeight); | |
151 | m_ctuAddr = cu.m_cuAddr; | |
152 | m_cuAbsPartIdx = cuGeom.encodeIdx; | |
153 | } | |
154 | ||
155 | void Predict::prepMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx) | |
156 | { | |
157 | initMotionCompensation(cu, cuGeom, partIdx); | |
158 | ||
159 | m_refIdx0 = cu.m_refIdx[0][m_puAbsPartIdx]; | |
160 | m_clippedMv[0] = cu.m_mv[0][m_puAbsPartIdx]; | |
161 | m_refIdx1 = cu.m_refIdx[1][m_puAbsPartIdx]; | |
162 | m_clippedMv[1] = cu.m_mv[1][m_puAbsPartIdx]; | |
163 | cu.clipMv(m_clippedMv[0]); | |
164 | cu.clipMv(m_clippedMv[1]); | |
165 | } | |
166 | ||
167 | void Predict::motionCompensation(Yuv& predYuv, bool bLuma, bool bChroma) | |
168 | { | |
169 | if (m_predSlice->isInterP()) | |
170 | { | |
171 | /* P Slice */ | |
172 | WeightValues wv0[3]; | |
173 | X265_CHECK(m_refIdx0 >= 0, "invalid P refidx\n"); | |
174 | X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "P refidx out of range\n"); | |
175 | const WeightParam *wp0 = m_predSlice->m_weightPredTable[0][m_refIdx0]; | |
176 | ||
177 | if (m_predSlice->m_pps->bUseWeightPred && wp0->bPresentFlag) | |
178 | { | |
179 | for (int plane = 0; plane < 3; plane++) | |
180 | { | |
181 | wv0[plane].w = wp0[plane].inputWeight; | |
182 | wv0[plane].offset = wp0[plane].inputOffset * (1 << (X265_DEPTH - 8)); | |
183 | wv0[plane].shift = wp0[plane].log2WeightDenom; | |
184 | wv0[plane].round = wp0[plane].log2WeightDenom >= 1 ? 1 << (wp0[plane].log2WeightDenom - 1) : 0; | |
185 | } | |
186 | ||
187 | ShortYuv& shortYuv = m_predShortYuv[0]; | |
188 | ||
189 | if (bLuma) | |
190 | predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]); | |
191 | if (bChroma) | |
192 | predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]); | |
193 | ||
194 | addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma); | |
195 | } | |
196 | else | |
197 | { | |
198 | if (bLuma) | |
199 | predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]); | |
200 | if (bChroma) | |
201 | predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]); | |
202 | } | |
203 | } | |
204 | else | |
205 | { | |
206 | /* B Slice */ | |
207 | ||
208 | WeightValues wv0[3], wv1[3]; | |
209 | const WeightParam *pwp0, *pwp1; | |
210 | ||
211 | if (m_predSlice->m_pps->bUseWeightedBiPred) | |
212 | { | |
213 | pwp0 = m_refIdx0 >= 0 ? m_predSlice->m_weightPredTable[0][m_refIdx0] : NULL; | |
214 | pwp1 = m_refIdx1 >= 0 ? m_predSlice->m_weightPredTable[1][m_refIdx1] : NULL; | |
215 | ||
216 | if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag)) | |
217 | { | |
218 | /* biprediction weighting */ | |
219 | for (int plane = 0; plane < 3; plane++) | |
220 | { | |
221 | wv0[plane].w = pwp0[plane].inputWeight; | |
222 | wv0[plane].o = pwp0[plane].inputOffset * (1 << (X265_DEPTH - 8)); | |
223 | wv0[plane].shift = pwp0[plane].log2WeightDenom; | |
224 | wv0[plane].round = 1 << pwp0[plane].log2WeightDenom; | |
225 | ||
226 | wv1[plane].w = pwp1[plane].inputWeight; | |
227 | wv1[plane].o = pwp1[plane].inputOffset * (1 << (X265_DEPTH - 8)); | |
228 | wv1[plane].shift = wv0[plane].shift; | |
229 | wv1[plane].round = wv0[plane].round; | |
230 | } | |
231 | } | |
232 | else | |
233 | { | |
234 | /* uniprediction weighting, always outputs to wv0 */ | |
235 | const WeightParam* pwp = (m_refIdx0 >= 0) ? pwp0 : pwp1; | |
236 | for (int plane = 0; plane < 3; plane++) | |
237 | { | |
238 | wv0[plane].w = pwp[plane].inputWeight; | |
239 | wv0[plane].offset = pwp[plane].inputOffset * (1 << (X265_DEPTH - 8)); | |
240 | wv0[plane].shift = pwp[plane].log2WeightDenom; | |
241 | wv0[plane].round = pwp[plane].log2WeightDenom >= 1 ? 1 << (pwp[plane].log2WeightDenom - 1) : 0; | |
242 | } | |
243 | } | |
244 | } | |
245 | else | |
246 | pwp0 = pwp1 = NULL; | |
247 | ||
248 | if (m_refIdx0 >= 0 && m_refIdx1 >= 0) | |
249 | { | |
250 | /* Biprediction */ | |
251 | X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "bidir refidx0 out of range\n"); | |
252 | X265_CHECK(m_refIdx1 < m_predSlice->m_numRefIdx[1], "bidir refidx1 out of range\n"); | |
253 | ||
254 | if (bLuma) | |
255 | { | |
256 | predInterLumaShort(m_predShortYuv[0], *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]); | |
257 | predInterLumaShort(m_predShortYuv[1], *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]); | |
258 | } | |
259 | if (bChroma) | |
260 | { | |
261 | predInterChromaShort(m_predShortYuv[0], *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]); | |
262 | predInterChromaShort(m_predShortYuv[1], *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]); | |
263 | } | |
264 | ||
265 | if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag)) | |
266 | addWeightBi(predYuv, m_predShortYuv[0], m_predShortYuv[1], wv0, wv1, bLuma, bChroma); | |
267 | else | |
268 | predYuv.addAvg(m_predShortYuv[0], m_predShortYuv[1], m_puAbsPartIdx, m_puWidth, m_puHeight, bLuma, bChroma); | |
269 | } | |
270 | else if (m_refIdx0 >= 0) | |
271 | { | |
272 | /* uniprediction to L0 */ | |
273 | X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "unidir refidx0 out of range\n"); | |
274 | ||
275 | if (pwp0 && pwp0->bPresentFlag) | |
276 | { | |
277 | ShortYuv& shortYuv = m_predShortYuv[0]; | |
278 | ||
279 | if (bLuma) | |
280 | predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]); | |
281 | if (bChroma) | |
282 | predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]); | |
283 | ||
284 | addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma); | |
285 | } | |
286 | else | |
287 | { | |
288 | if (bLuma) | |
289 | predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]); | |
290 | if (bChroma) | |
291 | predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPicYuv, m_clippedMv[0]); | |
292 | } | |
293 | } | |
294 | else | |
295 | { | |
296 | /* uniprediction to L1 */ | |
297 | X265_CHECK(m_refIdx1 >= 0, "refidx1 was not positive\n"); | |
298 | X265_CHECK(m_refIdx1 < m_predSlice->m_numRefIdx[1], "unidir refidx1 out of range\n"); | |
299 | ||
300 | if (pwp1 && pwp1->bPresentFlag) | |
301 | { | |
302 | ShortYuv& shortYuv = m_predShortYuv[0]; | |
303 | ||
304 | if (bLuma) | |
305 | predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]); | |
306 | if (bChroma) | |
307 | predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]); | |
308 | ||
309 | addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma); | |
310 | } | |
311 | else | |
312 | { | |
313 | if (bLuma) | |
314 | predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]); | |
315 | if (bChroma) | |
316 | predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPicYuv, m_clippedMv[1]); | |
317 | } | |
318 | } | |
319 | } | |
320 | } | |
321 | ||
322 | void Predict::predInterLumaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const | |
323 | { | |
324 | pixel *dst = dstYuv.getLumaAddr(m_puAbsPartIdx); | |
325 | intptr_t dstStride = dstYuv.m_size; | |
326 | ||
327 | intptr_t srcStride = refPic.m_stride; | |
328 | intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride; | |
329 | int partEnum = partitionFromSizes(m_puWidth, m_puHeight); | |
330 | pixel* src = const_cast<PicYuv&>(refPic).getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset; | |
331 | ||
332 | int xFrac = mv.x & 0x3; | |
333 | int yFrac = mv.y & 0x3; | |
334 | ||
335 | if (!(yFrac | xFrac)) | |
336 | primitives.luma_copy_pp[partEnum](dst, dstStride, src, srcStride); | |
337 | else if (!yFrac) | |
338 | primitives.luma_hpp[partEnum](src, srcStride, dst, dstStride, xFrac); | |
339 | else if (!xFrac) | |
340 | primitives.luma_vpp[partEnum](src, srcStride, dst, dstStride, yFrac); | |
341 | else | |
342 | { | |
343 | int tmpStride = m_puWidth; | |
344 | int filterSize = NTAPS_LUMA; | |
345 | int halfFilterSize = (filterSize >> 1); | |
346 | primitives.luma_hps[partEnum](src, srcStride, m_immedVals, tmpStride, xFrac, 1); | |
347 | primitives.luma_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac); | |
348 | } | |
349 | } | |
350 | ||
351 | void Predict::predInterLumaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const | |
352 | { | |
353 | int16_t *dst = dstSYuv.getLumaAddr(m_puAbsPartIdx); | |
354 | int dstStride = dstSYuv.m_size; | |
355 | ||
356 | intptr_t srcStride = refPic.m_stride; | |
357 | intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride; | |
358 | pixel *src = const_cast<PicYuv&>(refPic).getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset; | |
359 | ||
360 | int xFrac = mv.x & 0x3; | |
361 | int yFrac = mv.y & 0x3; | |
362 | ||
363 | int partEnum = partitionFromSizes(m_puWidth, m_puHeight); | |
364 | ||
365 | X265_CHECK((m_puWidth % 4) + (m_puHeight % 4) == 0, "width or height not divisible by 4\n"); | |
366 | X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n"); | |
367 | ||
368 | if (!(yFrac | xFrac)) | |
369 | primitives.luma_p2s(src, srcStride, dst, m_puWidth, m_puHeight); | |
370 | else if (!yFrac) | |
371 | primitives.luma_hps[partEnum](src, srcStride, dst, dstStride, xFrac, 0); | |
372 | else if (!xFrac) | |
373 | primitives.luma_vps[partEnum](src, srcStride, dst, dstStride, yFrac); | |
374 | else | |
375 | { | |
376 | int tmpStride = m_puWidth; | |
377 | int filterSize = NTAPS_LUMA; | |
378 | int halfFilterSize = (filterSize >> 1); | |
379 | primitives.luma_hps[partEnum](src, srcStride, m_immedVals, tmpStride, xFrac, 1); | |
380 | primitives.luma_vss[partEnum](m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac); | |
381 | } | |
382 | } | |
383 | ||
384 | void Predict::predInterChromaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const | |
385 | { | |
386 | intptr_t dstStride = dstYuv.m_csize; | |
387 | intptr_t refStride = refPic.m_strideC; | |
388 | ||
389 | int shiftHor = (2 + m_hChromaShift); | |
390 | int shiftVer = (2 + m_vChromaShift); | |
391 | ||
392 | intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride; | |
393 | ||
394 | pixel* refCb = const_cast<PicYuv&>(refPic).getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset; | |
395 | pixel* refCr = const_cast<PicYuv&>(refPic).getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset; | |
396 | ||
397 | pixel* dstCb = dstYuv.getCbAddr(m_puAbsPartIdx); | |
398 | pixel* dstCr = dstYuv.getCrAddr(m_puAbsPartIdx); | |
399 | ||
400 | int xFrac = mv.x & ((1 << shiftHor) - 1); | |
401 | int yFrac = mv.y & ((1 << shiftVer) - 1); | |
402 | ||
403 | int partEnum = partitionFromSizes(m_puWidth, m_puHeight); | |
404 | ||
405 | if (!(yFrac | xFrac)) | |
406 | { | |
407 | primitives.chroma[m_csp].copy_pp[partEnum](dstCb, dstStride, refCb, refStride); | |
408 | primitives.chroma[m_csp].copy_pp[partEnum](dstCr, dstStride, refCr, refStride); | |
409 | } | |
410 | else if (!yFrac) | |
411 | { | |
412 | primitives.chroma[m_csp].filter_hpp[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift)); | |
413 | primitives.chroma[m_csp].filter_hpp[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift)); | |
414 | } | |
415 | else if (!xFrac) | |
416 | { | |
417 | primitives.chroma[m_csp].filter_vpp[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift)); | |
418 | primitives.chroma[m_csp].filter_vpp[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift)); | |
419 | } | |
420 | else | |
421 | { | |
422 | int extStride = m_puWidth >> m_hChromaShift; | |
423 | int filterSize = NTAPS_CHROMA; | |
424 | int halfFilterSize = (filterSize >> 1); | |
425 | ||
426 | primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1); | |
427 | primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift)); | |
428 | ||
429 | primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1); | |
430 | primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift)); | |
431 | } | |
432 | } | |
433 | ||
434 | void Predict::predInterChromaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const | |
435 | { | |
436 | intptr_t refStride = refPic.m_strideC; | |
437 | intptr_t dstStride = dstSYuv.m_csize; | |
438 | ||
439 | int shiftHor = (2 + m_hChromaShift); | |
440 | int shiftVer = (2 + m_vChromaShift); | |
441 | ||
442 | intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride; | |
443 | ||
444 | pixel* refCb = const_cast<PicYuv&>(refPic).getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset; | |
445 | pixel* refCr = const_cast<PicYuv&>(refPic).getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset; | |
446 | ||
447 | int16_t* dstCb = dstSYuv.getCbAddr(m_puAbsPartIdx); | |
448 | int16_t* dstCr = dstSYuv.getCrAddr(m_puAbsPartIdx); | |
449 | ||
450 | int xFrac = mv.x & ((1 << shiftHor) - 1); | |
451 | int yFrac = mv.y & ((1 << shiftVer) - 1); | |
452 | ||
453 | int partEnum = partitionFromSizes(m_puWidth, m_puHeight); | |
454 | ||
455 | uint32_t cxWidth = m_puWidth >> m_hChromaShift; | |
456 | uint32_t cxHeight = m_puHeight >> m_vChromaShift; | |
457 | ||
458 | X265_CHECK(((cxWidth | cxHeight) % 2) == 0, "chroma block size expected to be multiple of 2\n"); | |
459 | ||
460 | if (!(yFrac | xFrac)) | |
461 | { | |
462 | primitives.chroma_p2s[m_csp](refCb, refStride, dstCb, cxWidth, cxHeight); | |
463 | primitives.chroma_p2s[m_csp](refCr, refStride, dstCr, cxWidth, cxHeight); | |
464 | } | |
465 | else if (!yFrac) | |
466 | { | |
467 | primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift), 0); | |
468 | primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift), 0); | |
469 | } | |
470 | else if (!xFrac) | |
471 | { | |
472 | primitives.chroma[m_csp].filter_vps[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift)); | |
473 | primitives.chroma[m_csp].filter_vps[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift)); | |
474 | } | |
475 | else | |
476 | { | |
477 | int extStride = cxWidth; | |
478 | int filterSize = NTAPS_CHROMA; | |
479 | int halfFilterSize = (filterSize >> 1); | |
480 | primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1); | |
481 | primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift)); | |
482 | primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1); | |
483 | primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift)); | |
484 | } | |
485 | } | |
486 | ||
487 | /* weighted averaging for bi-pred */ | |
488 | void Predict::addWeightBi(Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const | |
489 | { | |
490 | int x, y; | |
491 | ||
492 | int w0, w1, offset, shiftNum, shift, round; | |
493 | uint32_t src0Stride, src1Stride, dststride; | |
494 | ||
495 | pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx); | |
496 | pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx); | |
497 | pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx); | |
498 | ||
499 | const int16_t* srcY0 = srcYuv0.getLumaAddr(m_puAbsPartIdx); | |
500 | const int16_t* srcU0 = srcYuv0.getCbAddr(m_puAbsPartIdx); | |
501 | const int16_t* srcV0 = srcYuv0.getCrAddr(m_puAbsPartIdx); | |
502 | ||
503 | const int16_t* srcY1 = srcYuv1.getLumaAddr(m_puAbsPartIdx); | |
504 | const int16_t* srcU1 = srcYuv1.getCbAddr(m_puAbsPartIdx); | |
505 | const int16_t* srcV1 = srcYuv1.getCrAddr(m_puAbsPartIdx); | |
506 | ||
507 | if (bLuma) | |
508 | { | |
509 | // Luma | |
510 | w0 = wp0[0].w; | |
511 | offset = wp0[0].o + wp1[0].o; | |
512 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; | |
513 | shift = wp0[0].shift + shiftNum + 1; | |
514 | round = shift ? (1 << (shift - 1)) : 0; | |
515 | w1 = wp1[0].w; | |
516 | ||
517 | src0Stride = srcYuv0.m_size; | |
518 | src1Stride = srcYuv1.m_size; | |
519 | dststride = predYuv.m_size; | |
520 | ||
521 | // TODO: can we use weight_sp here? | |
522 | for (y = m_puHeight - 1; y >= 0; y--) | |
523 | { | |
524 | for (x = m_puWidth - 1; x >= 0; ) | |
525 | { | |
526 | // note: luma min width is 4 | |
527 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); | |
528 | x--; | |
529 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); | |
530 | x--; | |
531 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); | |
532 | x--; | |
533 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); | |
534 | x--; | |
535 | } | |
536 | ||
537 | srcY0 += src0Stride; | |
538 | srcY1 += src1Stride; | |
539 | dstY += dststride; | |
540 | } | |
541 | } | |
542 | ||
543 | if (bChroma) | |
544 | { | |
545 | // Chroma U | |
546 | w0 = wp0[1].w; | |
547 | offset = wp0[1].o + wp1[1].o; | |
548 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; | |
549 | shift = wp0[1].shift + shiftNum + 1; | |
550 | round = shift ? (1 << (shift - 1)) : 0; | |
551 | w1 = wp1[1].w; | |
552 | ||
553 | src0Stride = srcYuv0.m_csize; | |
554 | src1Stride = srcYuv1.m_csize; | |
555 | dststride = predYuv.m_csize; | |
556 | ||
557 | uint32_t cwidth = m_puWidth >> srcYuv0.m_hChromaShift; | |
558 | uint32_t cheight = m_puHeight >> srcYuv0.m_vChromaShift; | |
559 | ||
560 | // TODO: can we use weight_sp here? | |
561 | for (y = cheight - 1; y >= 0; y--) | |
562 | { | |
563 | for (x = cwidth - 1; x >= 0;) | |
564 | { | |
565 | // note: chroma min width is 2 | |
566 | dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset); | |
567 | x--; | |
568 | dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset); | |
569 | x--; | |
570 | } | |
571 | ||
572 | srcU0 += src0Stride; | |
573 | srcU1 += src1Stride; | |
574 | dstU += dststride; | |
575 | } | |
576 | ||
577 | // Chroma V | |
578 | w0 = wp0[2].w; | |
579 | offset = wp0[2].o + wp1[2].o; | |
580 | shift = wp0[2].shift + shiftNum + 1; | |
581 | round = shift ? (1 << (shift - 1)) : 0; | |
582 | w1 = wp1[2].w; | |
583 | ||
584 | for (y = cheight - 1; y >= 0; y--) | |
585 | { | |
586 | for (x = cwidth - 1; x >= 0;) | |
587 | { | |
588 | // note: chroma min width is 2 | |
589 | dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset); | |
590 | x--; | |
591 | dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset); | |
592 | x--; | |
593 | } | |
594 | ||
595 | srcV0 += src0Stride; | |
596 | srcV1 += src1Stride; | |
597 | dstV += dststride; | |
598 | } | |
599 | } | |
600 | } | |
601 | ||
602 | /* weighted averaging for uni-pred */ | |
603 | void Predict::addWeightUni(Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const | |
604 | { | |
605 | pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx); | |
606 | pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx); | |
607 | pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx); | |
608 | ||
609 | const int16_t* srcY0 = srcYuv.getLumaAddr(m_puAbsPartIdx); | |
610 | const int16_t* srcU0 = srcYuv.getCbAddr(m_puAbsPartIdx); | |
611 | const int16_t* srcV0 = srcYuv.getCrAddr(m_puAbsPartIdx); | |
612 | ||
613 | int w0, offset, shiftNum, shift, round; | |
614 | uint32_t srcStride, dstStride; | |
615 | ||
616 | if (bLuma) | |
617 | { | |
618 | // Luma | |
619 | w0 = wp[0].w; | |
620 | offset = wp[0].offset; | |
621 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; | |
622 | shift = wp[0].shift + shiftNum; | |
623 | round = shift ? (1 << (shift - 1)) : 0; | |
624 | srcStride = srcYuv.m_size; | |
625 | dstStride = predYuv.m_size; | |
626 | ||
627 | primitives.weight_sp(const_cast<int16_t*>(srcY0), dstY, srcStride, dstStride, m_puWidth, m_puHeight, w0, round, shift, offset); | |
628 | } | |
629 | ||
630 | if (bChroma) | |
631 | { | |
632 | // Chroma U | |
633 | w0 = wp[1].w; | |
634 | offset = wp[1].offset; | |
635 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; | |
636 | shift = wp[1].shift + shiftNum; | |
637 | round = shift ? (1 << (shift - 1)) : 0; | |
638 | ||
639 | srcStride = srcYuv.m_csize; | |
640 | dstStride = predYuv.m_csize; | |
641 | ||
642 | uint32_t cwidth = m_puWidth >> srcYuv.m_hChromaShift; | |
643 | uint32_t cheight = m_puHeight >> srcYuv.m_vChromaShift; | |
644 | ||
645 | primitives.weight_sp(const_cast<int16_t*>(srcU0), dstU, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset); | |
646 | ||
647 | // Chroma V | |
648 | w0 = wp[2].w; | |
649 | offset = wp[2].offset; | |
650 | shift = wp[2].shift + shiftNum; | |
651 | round = shift ? (1 << (shift - 1)) : 0; | |
652 | ||
653 | primitives.weight_sp(const_cast<int16_t*>(srcV0), dstV, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset); | |
654 | } | |
655 | } | |
656 | ||
657 | void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, int dirMode) | |
658 | { | |
659 | IntraNeighbors intraNeighbors; | |
660 | initIntraNeighbors(cu, absPartIdx, partDepth, true, &intraNeighbors); | |
661 | ||
662 | pixel* adiBuf = m_predBuf; | |
663 | pixel* refAbove = m_refAbove; | |
664 | pixel* refLeft = m_refLeft; | |
665 | pixel* refAboveFlt = m_refAboveFlt; | |
666 | pixel* refLeftFlt = m_refLeftFlt; | |
667 | ||
668 | int tuSize = intraNeighbors.tuSize; | |
669 | int tuSize2 = tuSize << 1; | |
670 | ||
671 | pixel* adiOrigin = cu.m_encData->m_reconPicYuv->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx); | |
672 | intptr_t picStride = cu.m_encData->m_reconPicYuv->m_stride; | |
673 | ||
674 | fillReferenceSamples(adiOrigin, picStride, adiBuf, intraNeighbors); | |
675 | ||
676 | // initialization of ADI buffers | |
677 | const int bufOffset = tuSize - 1; | |
678 | refAbove += bufOffset; | |
679 | refLeft += bufOffset; | |
680 | ||
681 | // ADI_BUF_STRIDE * (2 * tuSize + 1); | |
682 | memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel)); | |
683 | for (int k = 0; k < tuSize2 + 1; k++) | |
684 | refLeft[k] = adiBuf[k * ADI_BUF_STRIDE]; | |
685 | ||
686 | if (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize) | |
687 | { | |
688 | // generate filtered intra prediction samples | |
689 | refAboveFlt += bufOffset; | |
690 | refLeftFlt += bufOffset; | |
691 | ||
692 | bool bStrongSmoothing = (tuSize == 32 && cu.m_slice->m_sps->bUseStrongIntraSmoothing); | |
693 | ||
694 | if (bStrongSmoothing) | |
695 | { | |
696 | const int trSize = 32; | |
697 | const int trSize2 = 32 * 2; | |
698 | const int threshold = 1 << (X265_DEPTH - 5); | |
699 | int refBL = refLeft[trSize2]; | |
700 | int refTL = refAbove[0]; | |
701 | int refTR = refAbove[trSize2]; | |
702 | bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize]) < threshold && | |
703 | abs(refTL + refTR - 2 * refAbove[trSize]) < threshold); | |
704 | ||
705 | if (bStrongSmoothing) | |
706 | { | |
707 | // bilinear interpolation | |
708 | const int shift = 5 + 1; // intraNeighbors.log2TrSize + 1; | |
709 | int init = (refTL << shift) + tuSize; | |
710 | int delta; | |
711 | ||
712 | refLeftFlt[0] = refAboveFlt[0] = refAbove[0]; | |
713 | ||
714 | //TODO: Performance Primitive??? | |
715 | delta = refBL - refTL; | |
716 | for (int i = 1; i < trSize2; i++) | |
717 | refLeftFlt[i] = (pixel)((init + delta * i) >> shift); | |
718 | refLeftFlt[trSize2] = refLeft[trSize2]; | |
719 | ||
720 | delta = refTR - refTL; | |
721 | for (int i = 1; i < trSize2; i++) | |
722 | refAboveFlt[i] = (pixel)((init + delta * i) >> shift); | |
723 | refAboveFlt[trSize2] = refAbove[trSize2]; | |
724 | ||
725 | return; | |
726 | } | |
727 | } | |
728 | ||
729 | refLeft[-1] = refAbove[1]; | |
730 | for (int i = 0; i < tuSize2; i++) | |
731 | refLeftFlt[i] = (refLeft[i - 1] + 2 * refLeft[i] + refLeft[i + 1] + 2) >> 2; | |
732 | refLeftFlt[tuSize2] = refLeft[tuSize2]; | |
733 | ||
734 | refAboveFlt[0] = refLeftFlt[0]; | |
735 | for (int i = 1; i < tuSize2; i++) | |
736 | refAboveFlt[i] = (refAbove[i - 1] + 2 * refAbove[i] + refAbove[i + 1] + 2) >> 2; | |
737 | refAboveFlt[tuSize2] = refAbove[tuSize2]; | |
738 | } | |
739 | } | |
740 | ||
741 | void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, uint32_t chromaId) | |
742 | { | |
743 | IntraNeighbors intraNeighbors; | |
744 | initIntraNeighbors(cu, absPartIdx, partDepth, false, &intraNeighbors); | |
745 | uint32_t tuSize = intraNeighbors.tuSize; | |
746 | ||
747 | const pixel* adiOrigin = cu.m_encData->m_reconPicYuv->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx); | |
748 | intptr_t picStride = cu.m_encData->m_reconPicYuv->m_strideC; | |
749 | pixel* adiRef = getAdiChromaBuf(chromaId, tuSize); | |
750 | ||
751 | fillReferenceSamples(adiOrigin, picStride, adiRef, intraNeighbors); | |
752 | } | |
753 | ||
754 | void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t partDepth, bool isLuma, IntraNeighbors *intraNeighbors) | |
755 | { | |
756 | uint32_t log2TrSize = cu.m_log2CUSize[0] - partDepth; | |
757 | int log2UnitWidth = LOG2_UNIT_SIZE; | |
758 | int log2UnitHeight = LOG2_UNIT_SIZE; | |
759 | ||
760 | if (!isLuma) | |
761 | { | |
762 | log2TrSize -= cu.m_hChromaShift; | |
763 | log2UnitWidth -= cu.m_hChromaShift; | |
764 | log2UnitHeight -= cu.m_vChromaShift; | |
765 | } | |
766 | ||
767 | int numIntraNeighbor = 0; | |
768 | bool *bNeighborFlags = intraNeighbors->bNeighborFlags; | |
769 | ||
770 | uint32_t partIdxLT, partIdxRT, partIdxLB; | |
771 | ||
772 | cu.deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, absPartIdx, partDepth); | |
773 | ||
774 | uint32_t tuSize = 1 << log2TrSize; | |
775 | int tuWidthInUnits = tuSize >> log2UnitWidth; | |
776 | int tuHeightInUnits = tuSize >> log2UnitHeight; | |
777 | int aboveUnits = tuWidthInUnits << 1; | |
778 | int leftUnits = tuHeightInUnits << 1; | |
779 | int partIdxStride = cu.m_slice->m_sps->numPartInCUSize; | |
780 | partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)]; | |
781 | ||
782 | bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); | |
783 | numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); | |
784 | numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); | |
785 | numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits)); | |
786 | numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); | |
787 | numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - tuHeightInUnits)); | |
788 | ||
789 | intraNeighbors->numIntraNeighbor = numIntraNeighbor; | |
790 | intraNeighbors->totalUnits = aboveUnits + leftUnits + 1; | |
791 | intraNeighbors->aboveUnits = aboveUnits; | |
792 | intraNeighbors->leftUnits = leftUnits; | |
793 | intraNeighbors->unitWidth = 1 << log2UnitWidth; | |
794 | intraNeighbors->unitHeight = 1 << log2UnitHeight; | |
795 | intraNeighbors->tuSize = tuSize; | |
796 | intraNeighbors->log2TrSize = log2TrSize; | |
797 | } | |
798 | ||
799 | void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors) | |
800 | { | |
801 | const pixel dcValue = (pixel)(1 << (X265_DEPTH - 1)); | |
802 | int numIntraNeighbor = intraNeighbors.numIntraNeighbor; | |
803 | int totalUnits = intraNeighbors.totalUnits; | |
804 | uint32_t tuSize = intraNeighbors.tuSize; | |
805 | uint32_t refSize = tuSize * 2 + 1; | |
806 | ||
807 | if (numIntraNeighbor == 0) | |
808 | { | |
809 | // Fill border with DC value | |
810 | for (uint32_t i = 0; i < refSize; i++) | |
811 | adiRef[i] = dcValue; | |
812 | ||
813 | for (uint32_t i = 1; i < refSize; i++) | |
814 | adiRef[i * ADI_BUF_STRIDE] = dcValue; | |
815 | } | |
816 | else if (numIntraNeighbor == totalUnits) | |
817 | { | |
818 | // Fill top border with rec. samples | |
819 | const pixel* adiTemp = adiOrigin - picStride - 1; | |
820 | memcpy(adiRef, adiTemp, refSize * sizeof(*adiRef)); | |
821 | ||
822 | // Fill left border with rec. samples | |
823 | adiTemp = adiOrigin - 1; | |
824 | for (uint32_t i = 1; i < refSize; i++) | |
825 | { | |
826 | adiRef[i * ADI_BUF_STRIDE] = adiTemp[0]; | |
827 | adiTemp += picStride; | |
828 | } | |
829 | } | |
830 | else // reference samples are partially available | |
831 | { | |
832 | const bool *bNeighborFlags = intraNeighbors.bNeighborFlags; | |
833 | const bool *pNeighborFlags; | |
834 | int aboveUnits = intraNeighbors.aboveUnits; | |
835 | int leftUnits = intraNeighbors.leftUnits; | |
836 | int unitWidth = intraNeighbors.unitWidth; | |
837 | int unitHeight = intraNeighbors.unitHeight; | |
838 | int totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth); | |
839 | pixel adiLineBuffer[5 * MAX_CU_SIZE]; | |
840 | pixel *adi; | |
841 | ||
842 | // Initialize | |
843 | for (int i = 0; i < totalSamples; i++) | |
844 | adiLineBuffer[i] = dcValue; | |
845 | ||
846 | // Fill top-left sample | |
847 | const pixel* adiTemp = adiOrigin - picStride - 1; | |
848 | adi = adiLineBuffer + (leftUnits * unitHeight); | |
849 | pNeighborFlags = bNeighborFlags + leftUnits; | |
850 | if (*pNeighborFlags) | |
851 | { | |
852 | pixel topLeftVal = adiTemp[0]; | |
853 | for (int i = 0; i < unitWidth; i++) | |
854 | adi[i] = topLeftVal; | |
855 | } | |
856 | ||
857 | // Fill left & below-left samples | |
858 | adiTemp += picStride; | |
859 | adi--; | |
860 | pNeighborFlags--; | |
861 | for (int j = 0; j < leftUnits; j++) | |
862 | { | |
863 | if (*pNeighborFlags) | |
864 | for (int i = 0; i < unitHeight; i++) | |
865 | adi[-i] = adiTemp[i * picStride]; | |
866 | ||
867 | adiTemp += unitHeight * picStride; | |
868 | adi -= unitHeight; | |
869 | pNeighborFlags--; | |
870 | } | |
871 | ||
872 | // Fill above & above-right samples | |
873 | adiTemp = adiOrigin - picStride; | |
874 | adi = adiLineBuffer + (leftUnits * unitHeight) + unitWidth; | |
875 | pNeighborFlags = bNeighborFlags + leftUnits + 1; | |
876 | for (int j = 0; j < aboveUnits; j++) | |
877 | { | |
878 | if (*pNeighborFlags) | |
879 | memcpy(adi, adiTemp, unitWidth * sizeof(*adiTemp)); | |
880 | adiTemp += unitWidth; | |
881 | adi += unitWidth; | |
882 | pNeighborFlags++; | |
883 | } | |
884 | ||
885 | // Pad reference samples when necessary | |
886 | int curr = 0; | |
887 | int next = 1; | |
888 | adi = adiLineBuffer; | |
889 | int pAdiLineTopRowOffset = leftUnits * (unitHeight - unitWidth); | |
890 | if (!bNeighborFlags[0]) | |
891 | { | |
892 | // very bottom unit of bottom-left; at least one unit will be valid. | |
893 | while (next < totalUnits && !bNeighborFlags[next]) | |
894 | next++; | |
895 | ||
896 | pixel *pAdiLineNext = adiLineBuffer + ((next < leftUnits) ? (next * unitHeight) : (pAdiLineTopRowOffset + (next * unitWidth))); | |
897 | const pixel refSample = *pAdiLineNext; | |
898 | // Pad unavailable samples with new value | |
899 | int nextOrTop = X265_MIN(next, leftUnits); | |
900 | // fill left column | |
901 | while (curr < nextOrTop) | |
902 | { | |
903 | for (int i = 0; i < unitHeight; i++) | |
904 | adi[i] = refSample; | |
905 | ||
906 | adi += unitHeight; | |
907 | curr++; | |
908 | } | |
909 | ||
910 | // fill top row | |
911 | while (curr < next) | |
912 | { | |
913 | for (int i = 0; i < unitWidth; i++) | |
914 | adi[i] = refSample; | |
915 | ||
916 | adi += unitWidth; | |
917 | curr++; | |
918 | } | |
919 | } | |
920 | ||
921 | // pad all other reference samples. | |
922 | while (curr < totalUnits) | |
923 | { | |
924 | if (!bNeighborFlags[curr]) // samples not available | |
925 | { | |
926 | int numSamplesInCurrUnit = (curr >= leftUnits) ? unitWidth : unitHeight; | |
927 | const pixel refSample = *(adi - 1); | |
928 | for (int i = 0; i < numSamplesInCurrUnit; i++) | |
929 | adi[i] = refSample; | |
930 | ||
931 | adi += numSamplesInCurrUnit; | |
932 | curr++; | |
933 | } | |
934 | else | |
935 | { | |
936 | adi += (curr >= leftUnits) ? unitWidth : unitHeight; | |
937 | curr++; | |
938 | } | |
939 | } | |
940 | ||
941 | // Copy processed samples | |
942 | adi = adiLineBuffer + refSize + unitWidth - 2; | |
943 | memcpy(adiRef, adi, refSize * sizeof(*adiRef)); | |
944 | ||
945 | adi = adiLineBuffer + refSize - 1; | |
946 | for (int i = 1; i < (int)refSize; i++) | |
947 | adiRef[i * ADI_BUF_STRIDE] = adi[-i]; | |
948 | } | |
949 | } | |
950 | ||
951 | bool Predict::isAboveLeftAvailable(const CUData& cu, uint32_t partIdxLT) | |
952 | { | |
953 | uint32_t partAboveLeft; | |
954 | const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT); | |
955 | ||
956 | if (!cu.m_slice->m_pps->bConstrainedIntraPred) | |
957 | return cuAboveLeft ? true : false; | |
958 | else | |
959 | return cuAboveLeft && cuAboveLeft->isIntra(partAboveLeft); | |
960 | } | |
961 | ||
962 | int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool *bValidFlags) | |
963 | { | |
964 | const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT]; | |
965 | const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT] + 1; | |
966 | const uint32_t idxStep = 1; | |
967 | bool *validFlagPtr = bValidFlags; | |
968 | int numIntra = 0; | |
969 | ||
970 | for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep) | |
971 | { | |
972 | uint32_t partAbove; | |
973 | const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]); | |
974 | if (cuAbove && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAbove->isIntra(partAbove))) | |
975 | { | |
976 | numIntra++; | |
977 | *validFlagPtr = true; | |
978 | } | |
979 | else | |
980 | *validFlagPtr = false; | |
981 | ||
982 | validFlagPtr++; | |
983 | } | |
984 | ||
985 | return numIntra; | |
986 | } | |
987 | ||
988 | int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool *bValidFlags) | |
989 | { | |
990 | const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT]; | |
991 | const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB] + 1; | |
992 | const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize; | |
993 | bool *validFlagPtr = bValidFlags; | |
994 | int numIntra = 0; | |
995 | ||
996 | for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep) | |
997 | { | |
998 | uint32_t partLeft; | |
999 | const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]); | |
1000 | if (cuLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuLeft->isIntra(partLeft))) | |
1001 | { | |
1002 | numIntra++; | |
1003 | *validFlagPtr = true; | |
1004 | } | |
1005 | else | |
1006 | *validFlagPtr = false; | |
1007 | ||
1008 | validFlagPtr--; // opposite direction | |
1009 | } | |
1010 | ||
1011 | return numIntra; | |
1012 | } | |
1013 | ||
1014 | int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool *bValidFlags) | |
1015 | { | |
1016 | const uint32_t numUnitsInPU = g_zscanToRaster[partIdxRT] - g_zscanToRaster[partIdxLT] + 1; | |
1017 | bool *validFlagPtr = bValidFlags; | |
1018 | int numIntra = 0; | |
1019 | ||
1020 | for (uint32_t offset = 1; offset <= numUnitsInPU; offset++) | |
1021 | { | |
1022 | uint32_t partAboveRight; | |
1023 | const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset); | |
1024 | if (cuAboveRight && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAboveRight->isIntra(partAboveRight))) | |
1025 | { | |
1026 | numIntra++; | |
1027 | *validFlagPtr = true; | |
1028 | } | |
1029 | else | |
1030 | *validFlagPtr = false; | |
1031 | ||
1032 | validFlagPtr++; | |
1033 | } | |
1034 | ||
1035 | return numIntra; | |
1036 | } | |
1037 | ||
1038 | int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool *bValidFlags) | |
1039 | { | |
1040 | const uint32_t numUnitsInPU = (g_zscanToRaster[partIdxLB] - g_zscanToRaster[partIdxLT]) / cu.m_slice->m_sps->numPartInCUSize + 1; | |
1041 | bool *validFlagPtr = bValidFlags; | |
1042 | int numIntra = 0; | |
1043 | ||
1044 | for (uint32_t offset = 1; offset <= numUnitsInPU; offset++) | |
1045 | { | |
1046 | uint32_t partBelowLeft; | |
1047 | const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset); | |
1048 | if (cuBelowLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuBelowLeft->isIntra(partBelowLeft))) | |
1049 | { | |
1050 | numIntra++; | |
1051 | *validFlagPtr = true; | |
1052 | } | |
1053 | else | |
1054 | *validFlagPtr = false; | |
1055 | ||
1056 | validFlagPtr--; // opposite direction | |
1057 | } | |
1058 | ||
1059 | return numIntra; | |
1060 | } |