Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com. | |
22 | *****************************************************************************/ | |
23 | ||
24 | #include "common.h" | |
25 | #include "slice.h" | |
26 | #include "framedata.h" | |
27 | #include "picyuv.h" | |
28 | #include "predict.h" | |
29 | #include "primitives.h" | |
30 | ||
31 | using namespace x265; | |
32 | ||
33 | namespace | |
34 | { | |
35 | inline pixel weightBidir(int w0, int16_t P0, int w1, int16_t P1, int round, int shift, int offset) | |
36 | { | |
37 | return Clip((w0 * (P0 + IF_INTERNAL_OFFS) + w1 * (P1 + IF_INTERNAL_OFFS) + round + (offset << (shift - 1))) >> shift); | |
38 | } | |
39 | } | |
40 | ||
41 | Predict::Predict() | |
42 | { | |
43 | m_predBuf = NULL; | |
44 | m_refAbove = NULL; | |
45 | m_refAboveFlt = NULL; | |
46 | m_refLeft = NULL; | |
47 | m_refLeftFlt = NULL; | |
48 | m_immedVals = NULL; | |
49 | } | |
50 | ||
51 | Predict::~Predict() | |
52 | { | |
53 | X265_FREE(m_predBuf); | |
54 | X265_FREE(m_refAbove); | |
55 | X265_FREE(m_immedVals); | |
56 | m_predShortYuv[0].destroy(); | |
57 | m_predShortYuv[1].destroy(); | |
58 | } | |
59 | ||
60 | bool Predict::allocBuffers(int csp) | |
61 | { | |
62 | m_csp = csp; | |
63 | m_hChromaShift = CHROMA_H_SHIFT(csp); | |
64 | m_vChromaShift = CHROMA_V_SHIFT(csp); | |
65 | ||
66 | int predBufHeight = ((MAX_CU_SIZE + 2) << 4); | |
67 | int predBufStride = ((MAX_CU_SIZE + 8) << 4); | |
68 | CHECKED_MALLOC(m_predBuf, pixel, predBufStride * predBufHeight); | |
69 | CHECKED_MALLOC(m_immedVals, int16_t, 64 * (64 + NTAPS_LUMA - 1)); | |
70 | CHECKED_MALLOC(m_refAbove, pixel, 12 * MAX_CU_SIZE); | |
71 | ||
72 | m_refAboveFlt = m_refAbove + 3 * MAX_CU_SIZE; | |
73 | m_refLeft = m_refAboveFlt + 3 * MAX_CU_SIZE; | |
74 | m_refLeftFlt = m_refLeft + 3 * MAX_CU_SIZE; | |
75 | ||
76 | return m_predShortYuv[0].create(MAX_CU_SIZE, csp) && m_predShortYuv[1].create(MAX_CU_SIZE, csp); | |
77 | ||
78 | fail: | |
79 | return false; | |
80 | } | |
81 | ||
82 | void Predict::predIntraLumaAng(uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSize) | |
83 | { | |
84 | int tuSize = 1 << log2TrSize; | |
85 | ||
b53f7c52 JB |
86 | pixel* refLft; |
87 | pixel* refAbv; | |
72b9787e JB |
88 | |
89 | if (!(g_intraFilterFlags[dirMode] & tuSize)) | |
90 | { | |
91 | refLft = m_refLeft + tuSize - 1; | |
92 | refAbv = m_refAbove + tuSize - 1; | |
93 | } | |
94 | else | |
95 | { | |
96 | refLft = m_refLeftFlt + tuSize - 1; | |
97 | refAbv = m_refAboveFlt + tuSize - 1; | |
98 | } | |
99 | ||
100 | bool bFilter = log2TrSize <= 4; | |
101 | int sizeIdx = log2TrSize - 2; | |
102 | X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n"); | |
103 | primitives.intra_pred[dirMode][sizeIdx](dst, stride, refLft, refAbv, dirMode, bFilter); | |
104 | } | |
105 | ||
106 | void Predict::predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* dst, intptr_t stride, uint32_t log2TrSizeC, int chFmt) | |
107 | { | |
108 | int tuSize = 1 << log2TrSizeC; | |
109 | int tuSize2 = tuSize << 1; | |
110 | ||
111 | // Create the prediction | |
112 | const int bufOffset = tuSize - 1; | |
113 | pixel buf0[3 * MAX_CU_SIZE]; | |
114 | pixel buf1[3 * MAX_CU_SIZE]; | |
115 | pixel* above; | |
116 | pixel* left = buf0 + bufOffset; | |
117 | ||
118 | int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1); | |
119 | for (int k = 0; k < limit; k++) | |
120 | left[k] = src[k * ADI_BUF_STRIDE]; | |
121 | ||
122 | if (chFmt == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize)) | |
123 | { | |
124 | // generate filtered intra prediction samples | |
125 | buf0[bufOffset - 1] = src[1]; | |
126 | left = buf1 + bufOffset; | |
127 | for (int i = 0; i < tuSize2; i++) | |
128 | left[i] = (buf0[bufOffset + i - 1] + 2 * buf0[bufOffset + i] + buf0[bufOffset + i + 1] + 2) >> 2; | |
129 | left[tuSize2] = buf0[bufOffset + tuSize2]; | |
130 | ||
131 | above = buf0 + bufOffset; | |
132 | above[0] = left[0]; | |
133 | for (int i = 1; i < tuSize2; i++) | |
134 | above[i] = (src[i - 1] + 2 * src[i] + src[i + 1] + 2) >> 2; | |
135 | above[tuSize2] = src[tuSize2]; | |
136 | } | |
137 | else | |
138 | { | |
139 | above = buf1 + bufOffset; | |
140 | memcpy(above, src, (tuSize2 + 1) * sizeof(pixel)); | |
141 | } | |
142 | ||
143 | int sizeIdx = log2TrSizeC - 2; | |
144 | X265_CHECK(sizeIdx >= 0 && sizeIdx < 4, "intra block size is out of range\n"); | |
145 | primitives.intra_pred[dirMode][sizeIdx](dst, stride, left, above, dirMode, 0); | |
146 | } | |
147 | ||
148 | void Predict::initMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx) | |
149 | { | |
150 | m_predSlice = cu.m_slice; | |
151 | cu.getPartIndexAndSize(partIdx, m_puAbsPartIdx, m_puWidth, m_puHeight); | |
152 | m_ctuAddr = cu.m_cuAddr; | |
153 | m_cuAbsPartIdx = cuGeom.encodeIdx; | |
154 | } | |
155 | ||
156 | void Predict::prepMotionCompensation(const CUData& cu, const CUGeom& cuGeom, int partIdx) | |
157 | { | |
158 | initMotionCompensation(cu, cuGeom, partIdx); | |
159 | ||
160 | m_refIdx0 = cu.m_refIdx[0][m_puAbsPartIdx]; | |
161 | m_clippedMv[0] = cu.m_mv[0][m_puAbsPartIdx]; | |
162 | m_refIdx1 = cu.m_refIdx[1][m_puAbsPartIdx]; | |
163 | m_clippedMv[1] = cu.m_mv[1][m_puAbsPartIdx]; | |
164 | cu.clipMv(m_clippedMv[0]); | |
165 | cu.clipMv(m_clippedMv[1]); | |
166 | } | |
167 | ||
168 | void Predict::motionCompensation(Yuv& predYuv, bool bLuma, bool bChroma) | |
169 | { | |
170 | if (m_predSlice->isInterP()) | |
171 | { | |
172 | /* P Slice */ | |
173 | WeightValues wv0[3]; | |
174 | X265_CHECK(m_refIdx0 >= 0, "invalid P refidx\n"); | |
175 | X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "P refidx out of range\n"); | |
176 | const WeightParam *wp0 = m_predSlice->m_weightPredTable[0][m_refIdx0]; | |
177 | ||
178 | if (m_predSlice->m_pps->bUseWeightPred && wp0->bPresentFlag) | |
179 | { | |
180 | for (int plane = 0; plane < 3; plane++) | |
181 | { | |
182 | wv0[plane].w = wp0[plane].inputWeight; | |
183 | wv0[plane].offset = wp0[plane].inputOffset * (1 << (X265_DEPTH - 8)); | |
184 | wv0[plane].shift = wp0[plane].log2WeightDenom; | |
185 | wv0[plane].round = wp0[plane].log2WeightDenom >= 1 ? 1 << (wp0[plane].log2WeightDenom - 1) : 0; | |
186 | } | |
187 | ||
188 | ShortYuv& shortYuv = m_predShortYuv[0]; | |
189 | ||
190 | if (bLuma) | |
b53f7c52 | 191 | predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); |
72b9787e | 192 | if (bChroma) |
b53f7c52 | 193 | predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); |
72b9787e JB |
194 | |
195 | addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma); | |
196 | } | |
197 | else | |
198 | { | |
199 | if (bLuma) | |
b53f7c52 | 200 | predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); |
72b9787e | 201 | if (bChroma) |
b53f7c52 | 202 | predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); |
72b9787e JB |
203 | } |
204 | } | |
205 | else | |
206 | { | |
207 | /* B Slice */ | |
208 | ||
209 | WeightValues wv0[3], wv1[3]; | |
210 | const WeightParam *pwp0, *pwp1; | |
211 | ||
212 | if (m_predSlice->m_pps->bUseWeightedBiPred) | |
213 | { | |
214 | pwp0 = m_refIdx0 >= 0 ? m_predSlice->m_weightPredTable[0][m_refIdx0] : NULL; | |
215 | pwp1 = m_refIdx1 >= 0 ? m_predSlice->m_weightPredTable[1][m_refIdx1] : NULL; | |
216 | ||
217 | if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag)) | |
218 | { | |
219 | /* biprediction weighting */ | |
220 | for (int plane = 0; plane < 3; plane++) | |
221 | { | |
222 | wv0[plane].w = pwp0[plane].inputWeight; | |
223 | wv0[plane].o = pwp0[plane].inputOffset * (1 << (X265_DEPTH - 8)); | |
224 | wv0[plane].shift = pwp0[plane].log2WeightDenom; | |
225 | wv0[plane].round = 1 << pwp0[plane].log2WeightDenom; | |
226 | ||
227 | wv1[plane].w = pwp1[plane].inputWeight; | |
228 | wv1[plane].o = pwp1[plane].inputOffset * (1 << (X265_DEPTH - 8)); | |
229 | wv1[plane].shift = wv0[plane].shift; | |
230 | wv1[plane].round = wv0[plane].round; | |
231 | } | |
232 | } | |
233 | else | |
234 | { | |
235 | /* uniprediction weighting, always outputs to wv0 */ | |
236 | const WeightParam* pwp = (m_refIdx0 >= 0) ? pwp0 : pwp1; | |
237 | for (int plane = 0; plane < 3; plane++) | |
238 | { | |
239 | wv0[plane].w = pwp[plane].inputWeight; | |
240 | wv0[plane].offset = pwp[plane].inputOffset * (1 << (X265_DEPTH - 8)); | |
241 | wv0[plane].shift = pwp[plane].log2WeightDenom; | |
242 | wv0[plane].round = pwp[plane].log2WeightDenom >= 1 ? 1 << (pwp[plane].log2WeightDenom - 1) : 0; | |
243 | } | |
244 | } | |
245 | } | |
246 | else | |
247 | pwp0 = pwp1 = NULL; | |
248 | ||
249 | if (m_refIdx0 >= 0 && m_refIdx1 >= 0) | |
250 | { | |
251 | /* Biprediction */ | |
252 | X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "bidir refidx0 out of range\n"); | |
253 | X265_CHECK(m_refIdx1 < m_predSlice->m_numRefIdx[1], "bidir refidx1 out of range\n"); | |
254 | ||
255 | if (bLuma) | |
256 | { | |
b53f7c52 JB |
257 | predInterLumaShort(m_predShortYuv[0], *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); |
258 | predInterLumaShort(m_predShortYuv[1], *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]); | |
72b9787e JB |
259 | } |
260 | if (bChroma) | |
261 | { | |
b53f7c52 JB |
262 | predInterChromaShort(m_predShortYuv[0], *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); |
263 | predInterChromaShort(m_predShortYuv[1], *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]); | |
72b9787e JB |
264 | } |
265 | ||
266 | if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag)) | |
267 | addWeightBi(predYuv, m_predShortYuv[0], m_predShortYuv[1], wv0, wv1, bLuma, bChroma); | |
268 | else | |
269 | predYuv.addAvg(m_predShortYuv[0], m_predShortYuv[1], m_puAbsPartIdx, m_puWidth, m_puHeight, bLuma, bChroma); | |
270 | } | |
271 | else if (m_refIdx0 >= 0) | |
272 | { | |
273 | /* uniprediction to L0 */ | |
274 | X265_CHECK(m_refIdx0 < m_predSlice->m_numRefIdx[0], "unidir refidx0 out of range\n"); | |
275 | ||
276 | if (pwp0 && pwp0->bPresentFlag) | |
277 | { | |
278 | ShortYuv& shortYuv = m_predShortYuv[0]; | |
279 | ||
280 | if (bLuma) | |
b53f7c52 | 281 | predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); |
72b9787e | 282 | if (bChroma) |
b53f7c52 | 283 | predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); |
72b9787e JB |
284 | |
285 | addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma); | |
286 | } | |
287 | else | |
288 | { | |
289 | if (bLuma) | |
b53f7c52 | 290 | predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); |
72b9787e | 291 | if (bChroma) |
b53f7c52 | 292 | predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[0][m_refIdx0]->m_reconPic, m_clippedMv[0]); |
72b9787e JB |
293 | } |
294 | } | |
295 | else | |
296 | { | |
297 | /* uniprediction to L1 */ | |
298 | X265_CHECK(m_refIdx1 >= 0, "refidx1 was not positive\n"); | |
299 | X265_CHECK(m_refIdx1 < m_predSlice->m_numRefIdx[1], "unidir refidx1 out of range\n"); | |
300 | ||
301 | if (pwp1 && pwp1->bPresentFlag) | |
302 | { | |
303 | ShortYuv& shortYuv = m_predShortYuv[0]; | |
304 | ||
305 | if (bLuma) | |
b53f7c52 | 306 | predInterLumaShort(shortYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]); |
72b9787e | 307 | if (bChroma) |
b53f7c52 | 308 | predInterChromaShort(shortYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]); |
72b9787e JB |
309 | |
310 | addWeightUni(predYuv, shortYuv, wv0, bLuma, bChroma); | |
311 | } | |
312 | else | |
313 | { | |
314 | if (bLuma) | |
b53f7c52 | 315 | predInterLumaPixel(predYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]); |
72b9787e | 316 | if (bChroma) |
b53f7c52 | 317 | predInterChromaPixel(predYuv, *m_predSlice->m_refPicList[1][m_refIdx1]->m_reconPic, m_clippedMv[1]); |
72b9787e JB |
318 | } |
319 | } | |
320 | } | |
321 | } | |
322 | ||
323 | void Predict::predInterLumaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const | |
324 | { | |
b53f7c52 | 325 | pixel* dst = dstYuv.getLumaAddr(m_puAbsPartIdx); |
72b9787e JB |
326 | intptr_t dstStride = dstYuv.m_size; |
327 | ||
328 | intptr_t srcStride = refPic.m_stride; | |
329 | intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride; | |
330 | int partEnum = partitionFromSizes(m_puWidth, m_puHeight); | |
b53f7c52 | 331 | const pixel* src = refPic.getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset; |
72b9787e JB |
332 | |
333 | int xFrac = mv.x & 0x3; | |
334 | int yFrac = mv.y & 0x3; | |
335 | ||
336 | if (!(yFrac | xFrac)) | |
337 | primitives.luma_copy_pp[partEnum](dst, dstStride, src, srcStride); | |
338 | else if (!yFrac) | |
339 | primitives.luma_hpp[partEnum](src, srcStride, dst, dstStride, xFrac); | |
340 | else if (!xFrac) | |
341 | primitives.luma_vpp[partEnum](src, srcStride, dst, dstStride, yFrac); | |
342 | else | |
343 | { | |
344 | int tmpStride = m_puWidth; | |
345 | int filterSize = NTAPS_LUMA; | |
346 | int halfFilterSize = (filterSize >> 1); | |
347 | primitives.luma_hps[partEnum](src, srcStride, m_immedVals, tmpStride, xFrac, 1); | |
348 | primitives.luma_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac); | |
349 | } | |
350 | } | |
351 | ||
352 | void Predict::predInterLumaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const | |
353 | { | |
b53f7c52 | 354 | int16_t* dst = dstSYuv.getLumaAddr(m_puAbsPartIdx); |
72b9787e JB |
355 | int dstStride = dstSYuv.m_size; |
356 | ||
357 | intptr_t srcStride = refPic.m_stride; | |
358 | intptr_t srcOffset = (mv.x >> 2) + (mv.y >> 2) * srcStride; | |
b53f7c52 | 359 | const pixel* src = refPic.getLumaAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + srcOffset; |
72b9787e JB |
360 | |
361 | int xFrac = mv.x & 0x3; | |
362 | int yFrac = mv.y & 0x3; | |
363 | ||
364 | int partEnum = partitionFromSizes(m_puWidth, m_puHeight); | |
365 | ||
366 | X265_CHECK((m_puWidth % 4) + (m_puHeight % 4) == 0, "width or height not divisible by 4\n"); | |
367 | X265_CHECK(dstStride == MAX_CU_SIZE, "stride expected to be max cu size\n"); | |
368 | ||
369 | if (!(yFrac | xFrac)) | |
370 | primitives.luma_p2s(src, srcStride, dst, m_puWidth, m_puHeight); | |
371 | else if (!yFrac) | |
372 | primitives.luma_hps[partEnum](src, srcStride, dst, dstStride, xFrac, 0); | |
373 | else if (!xFrac) | |
374 | primitives.luma_vps[partEnum](src, srcStride, dst, dstStride, yFrac); | |
375 | else | |
376 | { | |
377 | int tmpStride = m_puWidth; | |
378 | int filterSize = NTAPS_LUMA; | |
379 | int halfFilterSize = (filterSize >> 1); | |
380 | primitives.luma_hps[partEnum](src, srcStride, m_immedVals, tmpStride, xFrac, 1); | |
381 | primitives.luma_vss[partEnum](m_immedVals + (halfFilterSize - 1) * tmpStride, tmpStride, dst, dstStride, yFrac); | |
382 | } | |
383 | } | |
384 | ||
385 | void Predict::predInterChromaPixel(Yuv& dstYuv, const PicYuv& refPic, const MV& mv) const | |
386 | { | |
387 | intptr_t dstStride = dstYuv.m_csize; | |
388 | intptr_t refStride = refPic.m_strideC; | |
389 | ||
390 | int shiftHor = (2 + m_hChromaShift); | |
391 | int shiftVer = (2 + m_vChromaShift); | |
392 | ||
393 | intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride; | |
394 | ||
b53f7c52 JB |
395 | const pixel* refCb = refPic.getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset; |
396 | const pixel* refCr = refPic.getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset; | |
72b9787e JB |
397 | |
398 | pixel* dstCb = dstYuv.getCbAddr(m_puAbsPartIdx); | |
399 | pixel* dstCr = dstYuv.getCrAddr(m_puAbsPartIdx); | |
400 | ||
401 | int xFrac = mv.x & ((1 << shiftHor) - 1); | |
402 | int yFrac = mv.y & ((1 << shiftVer) - 1); | |
403 | ||
404 | int partEnum = partitionFromSizes(m_puWidth, m_puHeight); | |
405 | ||
406 | if (!(yFrac | xFrac)) | |
407 | { | |
408 | primitives.chroma[m_csp].copy_pp[partEnum](dstCb, dstStride, refCb, refStride); | |
409 | primitives.chroma[m_csp].copy_pp[partEnum](dstCr, dstStride, refCr, refStride); | |
410 | } | |
411 | else if (!yFrac) | |
412 | { | |
413 | primitives.chroma[m_csp].filter_hpp[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift)); | |
414 | primitives.chroma[m_csp].filter_hpp[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift)); | |
415 | } | |
416 | else if (!xFrac) | |
417 | { | |
418 | primitives.chroma[m_csp].filter_vpp[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift)); | |
419 | primitives.chroma[m_csp].filter_vpp[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift)); | |
420 | } | |
421 | else | |
422 | { | |
423 | int extStride = m_puWidth >> m_hChromaShift; | |
424 | int filterSize = NTAPS_CHROMA; | |
425 | int halfFilterSize = (filterSize >> 1); | |
426 | ||
427 | primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1); | |
428 | primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift)); | |
429 | ||
430 | primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1); | |
431 | primitives.chroma[m_csp].filter_vsp[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift)); | |
432 | } | |
433 | } | |
434 | ||
435 | void Predict::predInterChromaShort(ShortYuv& dstSYuv, const PicYuv& refPic, const MV& mv) const | |
436 | { | |
437 | intptr_t refStride = refPic.m_strideC; | |
438 | intptr_t dstStride = dstSYuv.m_csize; | |
439 | ||
440 | int shiftHor = (2 + m_hChromaShift); | |
441 | int shiftVer = (2 + m_vChromaShift); | |
442 | ||
443 | intptr_t refOffset = (mv.x >> shiftHor) + (mv.y >> shiftVer) * refStride; | |
444 | ||
b53f7c52 JB |
445 | const pixel* refCb = refPic.getCbAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset; |
446 | const pixel* refCr = refPic.getCrAddr(m_ctuAddr, m_cuAbsPartIdx + m_puAbsPartIdx) + refOffset; | |
72b9787e JB |
447 | |
448 | int16_t* dstCb = dstSYuv.getCbAddr(m_puAbsPartIdx); | |
449 | int16_t* dstCr = dstSYuv.getCrAddr(m_puAbsPartIdx); | |
450 | ||
451 | int xFrac = mv.x & ((1 << shiftHor) - 1); | |
452 | int yFrac = mv.y & ((1 << shiftVer) - 1); | |
453 | ||
454 | int partEnum = partitionFromSizes(m_puWidth, m_puHeight); | |
455 | ||
456 | uint32_t cxWidth = m_puWidth >> m_hChromaShift; | |
457 | uint32_t cxHeight = m_puHeight >> m_vChromaShift; | |
458 | ||
459 | X265_CHECK(((cxWidth | cxHeight) % 2) == 0, "chroma block size expected to be multiple of 2\n"); | |
460 | ||
461 | if (!(yFrac | xFrac)) | |
462 | { | |
b53f7c52 JB |
463 | primitives.chroma[m_csp].p2s(refCb, refStride, dstCb, cxWidth, cxHeight); |
464 | primitives.chroma[m_csp].p2s(refCr, refStride, dstCr, cxWidth, cxHeight); | |
72b9787e JB |
465 | } |
466 | else if (!yFrac) | |
467 | { | |
468 | primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, dstCb, dstStride, xFrac << (1 - m_hChromaShift), 0); | |
469 | primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, dstCr, dstStride, xFrac << (1 - m_hChromaShift), 0); | |
470 | } | |
471 | else if (!xFrac) | |
472 | { | |
473 | primitives.chroma[m_csp].filter_vps[partEnum](refCb, refStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift)); | |
474 | primitives.chroma[m_csp].filter_vps[partEnum](refCr, refStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift)); | |
475 | } | |
476 | else | |
477 | { | |
478 | int extStride = cxWidth; | |
479 | int filterSize = NTAPS_CHROMA; | |
480 | int halfFilterSize = (filterSize >> 1); | |
481 | primitives.chroma[m_csp].filter_hps[partEnum](refCb, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1); | |
482 | primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCb, dstStride, yFrac << (1 - m_vChromaShift)); | |
483 | primitives.chroma[m_csp].filter_hps[partEnum](refCr, refStride, m_immedVals, extStride, xFrac << (1 - m_hChromaShift), 1); | |
484 | primitives.chroma[m_csp].filter_vss[partEnum](m_immedVals + (halfFilterSize - 1) * extStride, extStride, dstCr, dstStride, yFrac << (1 - m_vChromaShift)); | |
485 | } | |
486 | } | |
487 | ||
488 | /* weighted averaging for bi-pred */ | |
489 | void Predict::addWeightBi(Yuv& predYuv, const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, const WeightValues wp0[3], const WeightValues wp1[3], bool bLuma, bool bChroma) const | |
490 | { | |
491 | int x, y; | |
492 | ||
493 | int w0, w1, offset, shiftNum, shift, round; | |
494 | uint32_t src0Stride, src1Stride, dststride; | |
495 | ||
72b9787e JB |
496 | if (bLuma) |
497 | { | |
b53f7c52 JB |
498 | pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx); |
499 | const int16_t* srcY0 = srcYuv0.getLumaAddr(m_puAbsPartIdx); | |
500 | const int16_t* srcY1 = srcYuv1.getLumaAddr(m_puAbsPartIdx); | |
501 | ||
72b9787e JB |
502 | // Luma |
503 | w0 = wp0[0].w; | |
504 | offset = wp0[0].o + wp1[0].o; | |
505 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; | |
506 | shift = wp0[0].shift + shiftNum + 1; | |
507 | round = shift ? (1 << (shift - 1)) : 0; | |
508 | w1 = wp1[0].w; | |
509 | ||
510 | src0Stride = srcYuv0.m_size; | |
511 | src1Stride = srcYuv1.m_size; | |
512 | dststride = predYuv.m_size; | |
513 | ||
514 | // TODO: can we use weight_sp here? | |
515 | for (y = m_puHeight - 1; y >= 0; y--) | |
516 | { | |
517 | for (x = m_puWidth - 1; x >= 0; ) | |
518 | { | |
519 | // note: luma min width is 4 | |
520 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); | |
521 | x--; | |
522 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); | |
523 | x--; | |
524 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); | |
525 | x--; | |
526 | dstY[x] = weightBidir(w0, srcY0[x], w1, srcY1[x], round, shift, offset); | |
527 | x--; | |
528 | } | |
529 | ||
530 | srcY0 += src0Stride; | |
531 | srcY1 += src1Stride; | |
532 | dstY += dststride; | |
533 | } | |
534 | } | |
535 | ||
536 | if (bChroma) | |
537 | { | |
b53f7c52 JB |
538 | pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx); |
539 | pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx); | |
540 | const int16_t* srcU0 = srcYuv0.getCbAddr(m_puAbsPartIdx); | |
541 | const int16_t* srcV0 = srcYuv0.getCrAddr(m_puAbsPartIdx); | |
542 | const int16_t* srcU1 = srcYuv1.getCbAddr(m_puAbsPartIdx); | |
543 | const int16_t* srcV1 = srcYuv1.getCrAddr(m_puAbsPartIdx); | |
544 | ||
72b9787e JB |
545 | // Chroma U |
546 | w0 = wp0[1].w; | |
547 | offset = wp0[1].o + wp1[1].o; | |
548 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; | |
549 | shift = wp0[1].shift + shiftNum + 1; | |
550 | round = shift ? (1 << (shift - 1)) : 0; | |
551 | w1 = wp1[1].w; | |
552 | ||
553 | src0Stride = srcYuv0.m_csize; | |
554 | src1Stride = srcYuv1.m_csize; | |
555 | dststride = predYuv.m_csize; | |
556 | ||
557 | uint32_t cwidth = m_puWidth >> srcYuv0.m_hChromaShift; | |
558 | uint32_t cheight = m_puHeight >> srcYuv0.m_vChromaShift; | |
559 | ||
560 | // TODO: can we use weight_sp here? | |
561 | for (y = cheight - 1; y >= 0; y--) | |
562 | { | |
563 | for (x = cwidth - 1; x >= 0;) | |
564 | { | |
565 | // note: chroma min width is 2 | |
566 | dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset); | |
567 | x--; | |
568 | dstU[x] = weightBidir(w0, srcU0[x], w1, srcU1[x], round, shift, offset); | |
569 | x--; | |
570 | } | |
571 | ||
572 | srcU0 += src0Stride; | |
573 | srcU1 += src1Stride; | |
574 | dstU += dststride; | |
575 | } | |
576 | ||
577 | // Chroma V | |
578 | w0 = wp0[2].w; | |
579 | offset = wp0[2].o + wp1[2].o; | |
580 | shift = wp0[2].shift + shiftNum + 1; | |
581 | round = shift ? (1 << (shift - 1)) : 0; | |
582 | w1 = wp1[2].w; | |
583 | ||
584 | for (y = cheight - 1; y >= 0; y--) | |
585 | { | |
586 | for (x = cwidth - 1; x >= 0;) | |
587 | { | |
588 | // note: chroma min width is 2 | |
589 | dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset); | |
590 | x--; | |
591 | dstV[x] = weightBidir(w0, srcV0[x], w1, srcV1[x], round, shift, offset); | |
592 | x--; | |
593 | } | |
594 | ||
595 | srcV0 += src0Stride; | |
596 | srcV1 += src1Stride; | |
597 | dstV += dststride; | |
598 | } | |
599 | } | |
600 | } | |
601 | ||
602 | /* weighted averaging for uni-pred */ | |
603 | void Predict::addWeightUni(Yuv& predYuv, const ShortYuv& srcYuv, const WeightValues wp[3], bool bLuma, bool bChroma) const | |
604 | { | |
72b9787e JB |
605 | int w0, offset, shiftNum, shift, round; |
606 | uint32_t srcStride, dstStride; | |
607 | ||
608 | if (bLuma) | |
609 | { | |
b53f7c52 JB |
610 | pixel* dstY = predYuv.getLumaAddr(m_puAbsPartIdx); |
611 | const int16_t* srcY0 = srcYuv.getLumaAddr(m_puAbsPartIdx); | |
612 | ||
72b9787e JB |
613 | // Luma |
614 | w0 = wp[0].w; | |
615 | offset = wp[0].offset; | |
616 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; | |
617 | shift = wp[0].shift + shiftNum; | |
618 | round = shift ? (1 << (shift - 1)) : 0; | |
619 | srcStride = srcYuv.m_size; | |
620 | dstStride = predYuv.m_size; | |
621 | ||
b53f7c52 | 622 | primitives.weight_sp(srcY0, dstY, srcStride, dstStride, m_puWidth, m_puHeight, w0, round, shift, offset); |
72b9787e JB |
623 | } |
624 | ||
625 | if (bChroma) | |
626 | { | |
b53f7c52 JB |
627 | pixel* dstU = predYuv.getCbAddr(m_puAbsPartIdx); |
628 | pixel* dstV = predYuv.getCrAddr(m_puAbsPartIdx); | |
629 | const int16_t* srcU0 = srcYuv.getCbAddr(m_puAbsPartIdx); | |
630 | const int16_t* srcV0 = srcYuv.getCrAddr(m_puAbsPartIdx); | |
631 | ||
72b9787e JB |
632 | // Chroma U |
633 | w0 = wp[1].w; | |
634 | offset = wp[1].offset; | |
635 | shiftNum = IF_INTERNAL_PREC - X265_DEPTH; | |
636 | shift = wp[1].shift + shiftNum; | |
637 | round = shift ? (1 << (shift - 1)) : 0; | |
638 | ||
639 | srcStride = srcYuv.m_csize; | |
640 | dstStride = predYuv.m_csize; | |
641 | ||
642 | uint32_t cwidth = m_puWidth >> srcYuv.m_hChromaShift; | |
643 | uint32_t cheight = m_puHeight >> srcYuv.m_vChromaShift; | |
644 | ||
b53f7c52 | 645 | primitives.weight_sp(srcU0, dstU, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset); |
72b9787e JB |
646 | |
647 | // Chroma V | |
648 | w0 = wp[2].w; | |
649 | offset = wp[2].offset; | |
650 | shift = wp[2].shift + shiftNum; | |
651 | round = shift ? (1 << (shift - 1)) : 0; | |
652 | ||
b53f7c52 | 653 | primitives.weight_sp(srcV0, dstV, srcStride, dstStride, cwidth, cheight, w0, round, shift, offset); |
72b9787e JB |
654 | } |
655 | } | |
656 | ||
657 | void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, int dirMode) | |
658 | { | |
659 | IntraNeighbors intraNeighbors; | |
660 | initIntraNeighbors(cu, absPartIdx, partDepth, true, &intraNeighbors); | |
661 | ||
662 | pixel* adiBuf = m_predBuf; | |
663 | pixel* refAbove = m_refAbove; | |
664 | pixel* refLeft = m_refLeft; | |
665 | pixel* refAboveFlt = m_refAboveFlt; | |
666 | pixel* refLeftFlt = m_refLeftFlt; | |
667 | ||
668 | int tuSize = intraNeighbors.tuSize; | |
669 | int tuSize2 = tuSize << 1; | |
670 | ||
b53f7c52 JB |
671 | pixel* adiOrigin = cu.m_encData->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx); |
672 | intptr_t picStride = cu.m_encData->m_reconPic->m_stride; | |
72b9787e JB |
673 | |
674 | fillReferenceSamples(adiOrigin, picStride, adiBuf, intraNeighbors); | |
675 | ||
676 | // initialization of ADI buffers | |
677 | const int bufOffset = tuSize - 1; | |
678 | refAbove += bufOffset; | |
679 | refLeft += bufOffset; | |
680 | ||
681 | // ADI_BUF_STRIDE * (2 * tuSize + 1); | |
682 | memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel)); | |
683 | for (int k = 0; k < tuSize2 + 1; k++) | |
684 | refLeft[k] = adiBuf[k * ADI_BUF_STRIDE]; | |
685 | ||
686 | if (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize) | |
687 | { | |
688 | // generate filtered intra prediction samples | |
689 | refAboveFlt += bufOffset; | |
690 | refLeftFlt += bufOffset; | |
691 | ||
692 | bool bStrongSmoothing = (tuSize == 32 && cu.m_slice->m_sps->bUseStrongIntraSmoothing); | |
693 | ||
694 | if (bStrongSmoothing) | |
695 | { | |
696 | const int trSize = 32; | |
697 | const int trSize2 = 32 * 2; | |
698 | const int threshold = 1 << (X265_DEPTH - 5); | |
699 | int refBL = refLeft[trSize2]; | |
700 | int refTL = refAbove[0]; | |
701 | int refTR = refAbove[trSize2]; | |
702 | bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize]) < threshold && | |
703 | abs(refTL + refTR - 2 * refAbove[trSize]) < threshold); | |
704 | ||
705 | if (bStrongSmoothing) | |
706 | { | |
707 | // bilinear interpolation | |
708 | const int shift = 5 + 1; // intraNeighbors.log2TrSize + 1; | |
709 | int init = (refTL << shift) + tuSize; | |
710 | int delta; | |
711 | ||
712 | refLeftFlt[0] = refAboveFlt[0] = refAbove[0]; | |
713 | ||
714 | //TODO: Performance Primitive??? | |
715 | delta = refBL - refTL; | |
716 | for (int i = 1; i < trSize2; i++) | |
717 | refLeftFlt[i] = (pixel)((init + delta * i) >> shift); | |
718 | refLeftFlt[trSize2] = refLeft[trSize2]; | |
719 | ||
720 | delta = refTR - refTL; | |
721 | for (int i = 1; i < trSize2; i++) | |
722 | refAboveFlt[i] = (pixel)((init + delta * i) >> shift); | |
723 | refAboveFlt[trSize2] = refAbove[trSize2]; | |
724 | ||
725 | return; | |
726 | } | |
727 | } | |
728 | ||
729 | refLeft[-1] = refAbove[1]; | |
730 | for (int i = 0; i < tuSize2; i++) | |
731 | refLeftFlt[i] = (refLeft[i - 1] + 2 * refLeft[i] + refLeft[i + 1] + 2) >> 2; | |
732 | refLeftFlt[tuSize2] = refLeft[tuSize2]; | |
733 | ||
734 | refAboveFlt[0] = refLeftFlt[0]; | |
735 | for (int i = 1; i < tuSize2; i++) | |
736 | refAboveFlt[i] = (refAbove[i - 1] + 2 * refAbove[i] + refAbove[i + 1] + 2) >> 2; | |
737 | refAboveFlt[tuSize2] = refAbove[tuSize2]; | |
738 | } | |
739 | } | |
740 | ||
741 | void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t partDepth, uint32_t chromaId) | |
742 | { | |
743 | IntraNeighbors intraNeighbors; | |
744 | initIntraNeighbors(cu, absPartIdx, partDepth, false, &intraNeighbors); | |
745 | uint32_t tuSize = intraNeighbors.tuSize; | |
746 | ||
b53f7c52 JB |
747 | const pixel* adiOrigin = cu.m_encData->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx); |
748 | intptr_t picStride = cu.m_encData->m_reconPic->m_strideC; | |
72b9787e JB |
749 | pixel* adiRef = getAdiChromaBuf(chromaId, tuSize); |
750 | ||
751 | fillReferenceSamples(adiOrigin, picStride, adiRef, intraNeighbors); | |
752 | } | |
753 | ||
754 | void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, uint32_t partDepth, bool isLuma, IntraNeighbors *intraNeighbors) | |
755 | { | |
756 | uint32_t log2TrSize = cu.m_log2CUSize[0] - partDepth; | |
757 | int log2UnitWidth = LOG2_UNIT_SIZE; | |
758 | int log2UnitHeight = LOG2_UNIT_SIZE; | |
759 | ||
760 | if (!isLuma) | |
761 | { | |
762 | log2TrSize -= cu.m_hChromaShift; | |
763 | log2UnitWidth -= cu.m_hChromaShift; | |
764 | log2UnitHeight -= cu.m_vChromaShift; | |
765 | } | |
766 | ||
767 | int numIntraNeighbor = 0; | |
b53f7c52 | 768 | bool* bNeighborFlags = intraNeighbors->bNeighborFlags; |
72b9787e JB |
769 | |
770 | uint32_t partIdxLT, partIdxRT, partIdxLB; | |
771 | ||
772 | cu.deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, absPartIdx, partDepth); | |
773 | ||
774 | uint32_t tuSize = 1 << log2TrSize; | |
775 | int tuWidthInUnits = tuSize >> log2UnitWidth; | |
776 | int tuHeightInUnits = tuSize >> log2UnitHeight; | |
777 | int aboveUnits = tuWidthInUnits << 1; | |
778 | int leftUnits = tuHeightInUnits << 1; | |
779 | int partIdxStride = cu.m_slice->m_sps->numPartInCUSize; | |
780 | partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)]; | |
781 | ||
782 | bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); | |
783 | numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); | |
784 | numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); | |
785 | numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + tuWidthInUnits)); | |
786 | numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); | |
787 | numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - tuHeightInUnits)); | |
788 | ||
789 | intraNeighbors->numIntraNeighbor = numIntraNeighbor; | |
790 | intraNeighbors->totalUnits = aboveUnits + leftUnits + 1; | |
791 | intraNeighbors->aboveUnits = aboveUnits; | |
792 | intraNeighbors->leftUnits = leftUnits; | |
793 | intraNeighbors->unitWidth = 1 << log2UnitWidth; | |
794 | intraNeighbors->unitHeight = 1 << log2UnitHeight; | |
795 | intraNeighbors->tuSize = tuSize; | |
796 | intraNeighbors->log2TrSize = log2TrSize; | |
797 | } | |
798 | ||
799 | void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors) | |
800 | { | |
801 | const pixel dcValue = (pixel)(1 << (X265_DEPTH - 1)); | |
802 | int numIntraNeighbor = intraNeighbors.numIntraNeighbor; | |
803 | int totalUnits = intraNeighbors.totalUnits; | |
804 | uint32_t tuSize = intraNeighbors.tuSize; | |
805 | uint32_t refSize = tuSize * 2 + 1; | |
806 | ||
807 | if (numIntraNeighbor == 0) | |
808 | { | |
809 | // Fill border with DC value | |
810 | for (uint32_t i = 0; i < refSize; i++) | |
811 | adiRef[i] = dcValue; | |
812 | ||
813 | for (uint32_t i = 1; i < refSize; i++) | |
814 | adiRef[i * ADI_BUF_STRIDE] = dcValue; | |
815 | } | |
816 | else if (numIntraNeighbor == totalUnits) | |
817 | { | |
818 | // Fill top border with rec. samples | |
819 | const pixel* adiTemp = adiOrigin - picStride - 1; | |
820 | memcpy(adiRef, adiTemp, refSize * sizeof(*adiRef)); | |
821 | ||
822 | // Fill left border with rec. samples | |
823 | adiTemp = adiOrigin - 1; | |
824 | for (uint32_t i = 1; i < refSize; i++) | |
825 | { | |
826 | adiRef[i * ADI_BUF_STRIDE] = adiTemp[0]; | |
827 | adiTemp += picStride; | |
828 | } | |
829 | } | |
830 | else // reference samples are partially available | |
831 | { | |
b53f7c52 JB |
832 | const bool* bNeighborFlags = intraNeighbors.bNeighborFlags; |
833 | const bool* pNeighborFlags; | |
72b9787e JB |
834 | int aboveUnits = intraNeighbors.aboveUnits; |
835 | int leftUnits = intraNeighbors.leftUnits; | |
836 | int unitWidth = intraNeighbors.unitWidth; | |
837 | int unitHeight = intraNeighbors.unitHeight; | |
838 | int totalSamples = (leftUnits * unitHeight) + ((aboveUnits + 1) * unitWidth); | |
839 | pixel adiLineBuffer[5 * MAX_CU_SIZE]; | |
b53f7c52 | 840 | pixel* adi; |
72b9787e JB |
841 | |
842 | // Initialize | |
843 | for (int i = 0; i < totalSamples; i++) | |
844 | adiLineBuffer[i] = dcValue; | |
845 | ||
846 | // Fill top-left sample | |
847 | const pixel* adiTemp = adiOrigin - picStride - 1; | |
848 | adi = adiLineBuffer + (leftUnits * unitHeight); | |
849 | pNeighborFlags = bNeighborFlags + leftUnits; | |
850 | if (*pNeighborFlags) | |
851 | { | |
852 | pixel topLeftVal = adiTemp[0]; | |
853 | for (int i = 0; i < unitWidth; i++) | |
854 | adi[i] = topLeftVal; | |
855 | } | |
856 | ||
857 | // Fill left & below-left samples | |
858 | adiTemp += picStride; | |
859 | adi--; | |
860 | pNeighborFlags--; | |
861 | for (int j = 0; j < leftUnits; j++) | |
862 | { | |
863 | if (*pNeighborFlags) | |
864 | for (int i = 0; i < unitHeight; i++) | |
865 | adi[-i] = adiTemp[i * picStride]; | |
866 | ||
867 | adiTemp += unitHeight * picStride; | |
868 | adi -= unitHeight; | |
869 | pNeighborFlags--; | |
870 | } | |
871 | ||
872 | // Fill above & above-right samples | |
873 | adiTemp = adiOrigin - picStride; | |
874 | adi = adiLineBuffer + (leftUnits * unitHeight) + unitWidth; | |
875 | pNeighborFlags = bNeighborFlags + leftUnits + 1; | |
876 | for (int j = 0; j < aboveUnits; j++) | |
877 | { | |
878 | if (*pNeighborFlags) | |
879 | memcpy(adi, adiTemp, unitWidth * sizeof(*adiTemp)); | |
880 | adiTemp += unitWidth; | |
881 | adi += unitWidth; | |
882 | pNeighborFlags++; | |
883 | } | |
884 | ||
885 | // Pad reference samples when necessary | |
886 | int curr = 0; | |
887 | int next = 1; | |
888 | adi = adiLineBuffer; | |
889 | int pAdiLineTopRowOffset = leftUnits * (unitHeight - unitWidth); | |
890 | if (!bNeighborFlags[0]) | |
891 | { | |
892 | // very bottom unit of bottom-left; at least one unit will be valid. | |
893 | while (next < totalUnits && !bNeighborFlags[next]) | |
894 | next++; | |
895 | ||
b53f7c52 | 896 | pixel* pAdiLineNext = adiLineBuffer + ((next < leftUnits) ? (next * unitHeight) : (pAdiLineTopRowOffset + (next * unitWidth))); |
72b9787e JB |
897 | const pixel refSample = *pAdiLineNext; |
898 | // Pad unavailable samples with new value | |
899 | int nextOrTop = X265_MIN(next, leftUnits); | |
900 | // fill left column | |
901 | while (curr < nextOrTop) | |
902 | { | |
903 | for (int i = 0; i < unitHeight; i++) | |
904 | adi[i] = refSample; | |
905 | ||
906 | adi += unitHeight; | |
907 | curr++; | |
908 | } | |
909 | ||
910 | // fill top row | |
911 | while (curr < next) | |
912 | { | |
913 | for (int i = 0; i < unitWidth; i++) | |
914 | adi[i] = refSample; | |
915 | ||
916 | adi += unitWidth; | |
917 | curr++; | |
918 | } | |
919 | } | |
920 | ||
921 | // pad all other reference samples. | |
922 | while (curr < totalUnits) | |
923 | { | |
924 | if (!bNeighborFlags[curr]) // samples not available | |
925 | { | |
926 | int numSamplesInCurrUnit = (curr >= leftUnits) ? unitWidth : unitHeight; | |
927 | const pixel refSample = *(adi - 1); | |
928 | for (int i = 0; i < numSamplesInCurrUnit; i++) | |
929 | adi[i] = refSample; | |
930 | ||
931 | adi += numSamplesInCurrUnit; | |
932 | curr++; | |
933 | } | |
934 | else | |
935 | { | |
936 | adi += (curr >= leftUnits) ? unitWidth : unitHeight; | |
937 | curr++; | |
938 | } | |
939 | } | |
940 | ||
941 | // Copy processed samples | |
942 | adi = adiLineBuffer + refSize + unitWidth - 2; | |
943 | memcpy(adiRef, adi, refSize * sizeof(*adiRef)); | |
944 | ||
945 | adi = adiLineBuffer + refSize - 1; | |
946 | for (int i = 1; i < (int)refSize; i++) | |
947 | adiRef[i * ADI_BUF_STRIDE] = adi[-i]; | |
948 | } | |
949 | } | |
950 | ||
951 | bool Predict::isAboveLeftAvailable(const CUData& cu, uint32_t partIdxLT) | |
952 | { | |
953 | uint32_t partAboveLeft; | |
954 | const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, partIdxLT); | |
955 | ||
956 | if (!cu.m_slice->m_pps->bConstrainedIntraPred) | |
957 | return cuAboveLeft ? true : false; | |
958 | else | |
959 | return cuAboveLeft && cuAboveLeft->isIntra(partAboveLeft); | |
960 | } | |
961 | ||
b53f7c52 | 962 | int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags) |
72b9787e JB |
963 | { |
964 | const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT]; | |
965 | const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT] + 1; | |
966 | const uint32_t idxStep = 1; | |
b53f7c52 | 967 | bool* validFlagPtr = bValidFlags; |
72b9787e JB |
968 | int numIntra = 0; |
969 | ||
970 | for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep) | |
971 | { | |
972 | uint32_t partAbove; | |
973 | const CUData* cuAbove = cu.getPUAbove(partAbove, g_rasterToZscan[rasterPart]); | |
974 | if (cuAbove && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAbove->isIntra(partAbove))) | |
975 | { | |
976 | numIntra++; | |
977 | *validFlagPtr = true; | |
978 | } | |
979 | else | |
980 | *validFlagPtr = false; | |
981 | ||
982 | validFlagPtr++; | |
983 | } | |
984 | ||
985 | return numIntra; | |
986 | } | |
987 | ||
b53f7c52 | 988 | int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags) |
72b9787e JB |
989 | { |
990 | const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT]; | |
991 | const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB] + 1; | |
992 | const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize; | |
b53f7c52 | 993 | bool* validFlagPtr = bValidFlags; |
72b9787e JB |
994 | int numIntra = 0; |
995 | ||
996 | for (uint32_t rasterPart = rasterPartBegin; rasterPart < rasterPartEnd; rasterPart += idxStep) | |
997 | { | |
998 | uint32_t partLeft; | |
999 | const CUData* cuLeft = cu.getPULeft(partLeft, g_rasterToZscan[rasterPart]); | |
1000 | if (cuLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuLeft->isIntra(partLeft))) | |
1001 | { | |
1002 | numIntra++; | |
1003 | *validFlagPtr = true; | |
1004 | } | |
1005 | else | |
1006 | *validFlagPtr = false; | |
1007 | ||
1008 | validFlagPtr--; // opposite direction | |
1009 | } | |
1010 | ||
1011 | return numIntra; | |
1012 | } | |
1013 | ||
b53f7c52 | 1014 | int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxRT, bool* bValidFlags) |
72b9787e JB |
1015 | { |
1016 | const uint32_t numUnitsInPU = g_zscanToRaster[partIdxRT] - g_zscanToRaster[partIdxLT] + 1; | |
b53f7c52 | 1017 | bool* validFlagPtr = bValidFlags; |
72b9787e JB |
1018 | int numIntra = 0; |
1019 | ||
1020 | for (uint32_t offset = 1; offset <= numUnitsInPU; offset++) | |
1021 | { | |
1022 | uint32_t partAboveRight; | |
1023 | const CUData* cuAboveRight = cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset); | |
1024 | if (cuAboveRight && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuAboveRight->isIntra(partAboveRight))) | |
1025 | { | |
1026 | numIntra++; | |
1027 | *validFlagPtr = true; | |
1028 | } | |
1029 | else | |
1030 | *validFlagPtr = false; | |
1031 | ||
1032 | validFlagPtr++; | |
1033 | } | |
1034 | ||
1035 | return numIntra; | |
1036 | } | |
1037 | ||
b53f7c52 | 1038 | int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLT, uint32_t partIdxLB, bool* bValidFlags) |
72b9787e JB |
1039 | { |
1040 | const uint32_t numUnitsInPU = (g_zscanToRaster[partIdxLB] - g_zscanToRaster[partIdxLT]) / cu.m_slice->m_sps->numPartInCUSize + 1; | |
b53f7c52 | 1041 | bool* validFlagPtr = bValidFlags; |
72b9787e JB |
1042 | int numIntra = 0; |
1043 | ||
1044 | for (uint32_t offset = 1; offset <= numUnitsInPU; offset++) | |
1045 | { | |
1046 | uint32_t partBelowLeft; | |
1047 | const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, partIdxLB, offset); | |
1048 | if (cuBelowLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || cuBelowLeft->isIntra(partBelowLeft))) | |
1049 | { | |
1050 | numIntra++; | |
1051 | *validFlagPtr = true; | |
1052 | } | |
1053 | else | |
1054 | *validFlagPtr = false; | |
1055 | ||
1056 | validFlagPtr--; // opposite direction | |
1057 | } | |
1058 | ||
1059 | return numIntra; | |
1060 | } |