1 /*****************************************************************************
2 * Copyright (C) 2013 x265 project
4 * Authors: Deepthi Devaki <deepthidevaki@multicorewareinc.com>,
5 * Rajesh Paulraj <rajesh@multicorewareinc.com>
6 * Praveen Kumar Tiwari <praveen@multicorewareinc.com>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22 * This program is also available under a commercial proprietary license.
23 * For more information, contact us at license @ x265.com.
24 *****************************************************************************/
27 #include "primitives.h"
33 #pragma warning(disable: 4127) // conditional expression is constant, typical for templated functions
37 template<int dstStride
>
38 void filterConvertPelToShort_c(const pixel
* src
, intptr_t srcStride
, int16_t* dst
, int width
, int height
)
40 int shift
= IF_INTERNAL_PREC
- X265_DEPTH
;
43 for (row
= 0; row
< height
; row
++)
45 for (col
= 0; col
< width
; col
++)
47 int16_t val
= src
[col
] << shift
;
48 dst
[col
] = val
- (int16_t)IF_INTERNAL_OFFS
;
56 void extendCURowColBorder(pixel
* txt
, intptr_t stride
, int width
, int height
, int marginX
)
58 for (int y
= 0; y
< height
; y
++)
61 for (int x
= 0; x
< marginX
; x
++)
63 txt
[-marginX
+ x
] = txt
[0];
64 txt
[width
+ x
] = txt
[width
- 1];
68 ::memset(txt
- marginX
, txt
[0], marginX
);
69 ::memset(txt
+ width
, txt
[width
- 1], marginX
);
76 template<int N
, int width
, int height
>
77 void interp_horiz_pp_c(const pixel
* src
, intptr_t srcStride
, pixel
* dst
, intptr_t dstStride
, int coeffIdx
)
79 const int16_t* coeff
= (N
== 4) ? g_chromaFilter
[coeffIdx
] : g_lumaFilter
[coeffIdx
];
80 int headRoom
= IF_FILTER_PREC
;
81 int offset
= (1 << (headRoom
- 1));
82 uint16_t maxVal
= (1 << X265_DEPTH
) - 1;
85 src
-= (N
/ 2 - 1) * cStride
;
88 for (row
= 0; row
< height
; row
++)
90 for (col
= 0; col
< width
; col
++)
94 sum
= src
[col
+ 0 * cStride
] * coeff
[0];
95 sum
+= src
[col
+ 1 * cStride
] * coeff
[1];
96 sum
+= src
[col
+ 2 * cStride
] * coeff
[2];
97 sum
+= src
[col
+ 3 * cStride
] * coeff
[3];
100 sum
+= src
[col
+ 4 * cStride
] * coeff
[4];
101 sum
+= src
[col
+ 5 * cStride
] * coeff
[5];
102 sum
+= src
[col
+ 6 * cStride
] * coeff
[6];
103 sum
+= src
[col
+ 7 * cStride
] * coeff
[7];
105 int16_t val
= (int16_t)((sum
+ offset
) >> headRoom
);
107 if (val
< 0) val
= 0;
108 if (val
> maxVal
) val
= maxVal
;
109 dst
[col
] = (pixel
)val
;
117 template<int N
, int width
, int height
>
118 void interp_horiz_ps_c(const pixel
* src
, intptr_t srcStride
, int16_t* dst
, intptr_t dstStride
, int coeffIdx
, int isRowExt
)
120 const int16_t* coeff
= (N
== 4) ? g_chromaFilter
[coeffIdx
] : g_lumaFilter
[coeffIdx
];
121 int headRoom
= IF_INTERNAL_PREC
- X265_DEPTH
;
122 int shift
= IF_FILTER_PREC
- headRoom
;
123 int offset
= -IF_INTERNAL_OFFS
<< shift
;
124 int blkheight
= height
;
130 src
-= (N
/ 2 - 1) * srcStride
;
135 for (row
= 0; row
< blkheight
; row
++)
137 for (col
= 0; col
< width
; col
++)
141 sum
= src
[col
+ 0] * coeff
[0];
142 sum
+= src
[col
+ 1] * coeff
[1];
143 sum
+= src
[col
+ 2] * coeff
[2];
144 sum
+= src
[col
+ 3] * coeff
[3];
147 sum
+= src
[col
+ 4] * coeff
[4];
148 sum
+= src
[col
+ 5] * coeff
[5];
149 sum
+= src
[col
+ 6] * coeff
[6];
150 sum
+= src
[col
+ 7] * coeff
[7];
153 int16_t val
= (int16_t)((sum
+ offset
) >> shift
);
162 template<int N
, int width
, int height
>
163 void interp_vert_pp_c(const pixel
* src
, intptr_t srcStride
, pixel
* dst
, intptr_t dstStride
, int coeffIdx
)
165 const int16_t* c
= (N
== 4) ? g_chromaFilter
[coeffIdx
] : g_lumaFilter
[coeffIdx
];
166 int shift
= IF_FILTER_PREC
;
167 int offset
= 1 << (shift
- 1);
168 uint16_t maxVal
= (1 << X265_DEPTH
) - 1;
170 src
-= (N
/ 2 - 1) * srcStride
;
173 for (row
= 0; row
< height
; row
++)
175 for (col
= 0; col
< width
; col
++)
179 sum
= src
[col
+ 0 * srcStride
] * c
[0];
180 sum
+= src
[col
+ 1 * srcStride
] * c
[1];
181 sum
+= src
[col
+ 2 * srcStride
] * c
[2];
182 sum
+= src
[col
+ 3 * srcStride
] * c
[3];
185 sum
+= src
[col
+ 4 * srcStride
] * c
[4];
186 sum
+= src
[col
+ 5 * srcStride
] * c
[5];
187 sum
+= src
[col
+ 6 * srcStride
] * c
[6];
188 sum
+= src
[col
+ 7 * srcStride
] * c
[7];
191 int16_t val
= (int16_t)((sum
+ offset
) >> shift
);
192 val
= (val
< 0) ? 0 : val
;
193 val
= (val
> maxVal
) ? maxVal
: val
;
195 dst
[col
] = (pixel
)val
;
203 template<int N
, int width
, int height
>
204 void interp_vert_ps_c(const pixel
* src
, intptr_t srcStride
, int16_t* dst
, intptr_t dstStride
, int coeffIdx
)
206 const int16_t* c
= (N
== 4) ? g_chromaFilter
[coeffIdx
] : g_lumaFilter
[coeffIdx
];
207 int headRoom
= IF_INTERNAL_PREC
- X265_DEPTH
;
208 int shift
= IF_FILTER_PREC
- headRoom
;
209 int offset
= -IF_INTERNAL_OFFS
<< shift
;
211 src
-= (N
/ 2 - 1) * srcStride
;
214 for (row
= 0; row
< height
; row
++)
216 for (col
= 0; col
< width
; col
++)
220 sum
= src
[col
+ 0 * srcStride
] * c
[0];
221 sum
+= src
[col
+ 1 * srcStride
] * c
[1];
222 sum
+= src
[col
+ 2 * srcStride
] * c
[2];
223 sum
+= src
[col
+ 3 * srcStride
] * c
[3];
226 sum
+= src
[col
+ 4 * srcStride
] * c
[4];
227 sum
+= src
[col
+ 5 * srcStride
] * c
[5];
228 sum
+= src
[col
+ 6 * srcStride
] * c
[6];
229 sum
+= src
[col
+ 7 * srcStride
] * c
[7];
232 int16_t val
= (int16_t)((sum
+ offset
) >> shift
);
241 template<int N
, int width
, int height
>
242 void interp_vert_sp_c(const int16_t* src
, intptr_t srcStride
, pixel
* dst
, intptr_t dstStride
, int coeffIdx
)
244 int headRoom
= IF_INTERNAL_PREC
- X265_DEPTH
;
245 int shift
= IF_FILTER_PREC
+ headRoom
;
246 int offset
= (1 << (shift
- 1)) + (IF_INTERNAL_OFFS
<< IF_FILTER_PREC
);
247 uint16_t maxVal
= (1 << X265_DEPTH
) - 1;
248 const int16_t* coeff
= (N
== 8 ? g_lumaFilter
[coeffIdx
] : g_chromaFilter
[coeffIdx
]);
250 src
-= (N
/ 2 - 1) * srcStride
;
253 for (row
= 0; row
< height
; row
++)
255 for (col
= 0; col
< width
; col
++)
259 sum
= src
[col
+ 0 * srcStride
] * coeff
[0];
260 sum
+= src
[col
+ 1 * srcStride
] * coeff
[1];
261 sum
+= src
[col
+ 2 * srcStride
] * coeff
[2];
262 sum
+= src
[col
+ 3 * srcStride
] * coeff
[3];
265 sum
+= src
[col
+ 4 * srcStride
] * coeff
[4];
266 sum
+= src
[col
+ 5 * srcStride
] * coeff
[5];
267 sum
+= src
[col
+ 6 * srcStride
] * coeff
[6];
268 sum
+= src
[col
+ 7 * srcStride
] * coeff
[7];
271 int16_t val
= (int16_t)((sum
+ offset
) >> shift
);
273 val
= (val
< 0) ? 0 : val
;
274 val
= (val
> maxVal
) ? maxVal
: val
;
276 dst
[col
] = (pixel
)val
;
284 template<int N
, int width
, int height
>
285 void interp_vert_ss_c(const int16_t* src
, intptr_t srcStride
, int16_t* dst
, intptr_t dstStride
, int coeffIdx
)
287 const int16_t* c
= (N
== 8 ? g_lumaFilter
[coeffIdx
] : g_chromaFilter
[coeffIdx
]);
288 int shift
= IF_FILTER_PREC
;
291 src
-= (N
/ 2 - 1) * srcStride
;
292 for (row
= 0; row
< height
; row
++)
294 for (col
= 0; col
< width
; col
++)
298 sum
= src
[col
+ 0 * srcStride
] * c
[0];
299 sum
+= src
[col
+ 1 * srcStride
] * c
[1];
300 sum
+= src
[col
+ 2 * srcStride
] * c
[2];
301 sum
+= src
[col
+ 3 * srcStride
] * c
[3];
304 sum
+= src
[col
+ 4 * srcStride
] * c
[4];
305 sum
+= src
[col
+ 5 * srcStride
] * c
[5];
306 sum
+= src
[col
+ 6 * srcStride
] * c
[6];
307 sum
+= src
[col
+ 7 * srcStride
] * c
[7];
310 int16_t val
= (int16_t)((sum
) >> shift
);
320 void filterVertical_sp_c(const int16_t* src
, intptr_t srcStride
, pixel
* dst
, intptr_t dstStride
, int width
, int height
, int coeffIdx
)
322 int headRoom
= IF_INTERNAL_PREC
- X265_DEPTH
;
323 int shift
= IF_FILTER_PREC
+ headRoom
;
324 int offset
= (1 << (shift
- 1)) + (IF_INTERNAL_OFFS
<< IF_FILTER_PREC
);
325 uint16_t maxVal
= (1 << X265_DEPTH
) - 1;
326 const int16_t* coeff
= (N
== 8 ? g_lumaFilter
[coeffIdx
] : g_chromaFilter
[coeffIdx
]);
328 src
-= (N
/ 2 - 1) * srcStride
;
331 for (row
= 0; row
< height
; row
++)
333 for (col
= 0; col
< width
; col
++)
337 sum
= src
[col
+ 0 * srcStride
] * coeff
[0];
338 sum
+= src
[col
+ 1 * srcStride
] * coeff
[1];
339 sum
+= src
[col
+ 2 * srcStride
] * coeff
[2];
340 sum
+= src
[col
+ 3 * srcStride
] * coeff
[3];
343 sum
+= src
[col
+ 4 * srcStride
] * coeff
[4];
344 sum
+= src
[col
+ 5 * srcStride
] * coeff
[5];
345 sum
+= src
[col
+ 6 * srcStride
] * coeff
[6];
346 sum
+= src
[col
+ 7 * srcStride
] * coeff
[7];
349 int16_t val
= (int16_t)((sum
+ offset
) >> shift
);
351 val
= (val
< 0) ? 0 : val
;
352 val
= (val
> maxVal
) ? maxVal
: val
;
354 dst
[col
] = (pixel
)val
;
362 template<int N
, int width
, int height
>
363 void interp_hv_pp_c(const pixel
* src
, intptr_t srcStride
, pixel
* dst
, intptr_t dstStride
, int idxX
, int idxY
)
365 short immedVals
[(64 + 8) * (64 + 8)];
367 interp_horiz_ps_c
<N
, width
, height
>(src
, srcStride
, immedVals
, width
, idxX
, 1);
368 filterVertical_sp_c
<N
>(immedVals
+ 3 * width
, width
, dst
, dstStride
, width
, height
, idxY
);
373 // x265 private namespace
375 #define CHROMA_420(W, H) \
376 p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
377 p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
378 p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
379 p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
380 p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
381 p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
383 #define CHROMA_422(W, H) \
384 p.chroma[X265_CSP_I422].filter_hpp[CHROMA422_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
385 p.chroma[X265_CSP_I422].filter_hps[CHROMA422_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
386 p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
387 p.chroma[X265_CSP_I422].filter_vps[CHROMA422_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
388 p.chroma[X265_CSP_I422].filter_vsp[CHROMA422_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
389 p.chroma[X265_CSP_I422].filter_vss[CHROMA422_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
391 #define CHROMA_444(W, H) \
392 p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \
393 p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \
394 p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \
395 p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \
396 p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \
397 p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;
400 p.luma_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<8, W, H>; \
401 p.luma_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<8, W, H>; \
402 p.luma_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<8, W, H>; \
403 p.luma_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<8, W, H>; \
404 p.luma_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<8, W, H>; \
405 p.luma_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<8, W, H>; \
406 p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_hv_pp_c<8, W, H>;
408 void Setup_C_IPFilterPrimitives(EncoderPrimitives
& p
)
510 p
.luma_p2s
= filterConvertPelToShort_c
<MAX_CU_SIZE
>;
512 p
.chroma
[X265_CSP_I444
].p2s
= filterConvertPelToShort_c
<MAX_CU_SIZE
>;
513 p
.chroma
[X265_CSP_I420
].p2s
= filterConvertPelToShort_c
<MAX_CU_SIZE
/ 2>;
514 p
.chroma
[X265_CSP_I422
].p2s
= filterConvertPelToShort_c
<MAX_CU_SIZE
/ 2>;
516 p
.extendRowBorder
= extendCURowColBorder
;