X-Git-Url: https://git.piment-noir.org/?p=deb_x265.git;a=blobdiff_plain;f=source%2Fcommon%2Fx86%2Fasm-primitives.cpp;h=b81115b304e3b1565ac9e0cf4fb2a77d69716a0b;hp=ec1607dfa6f2fa7e69f22261f78646d775d5359f;hb=b53f7c52d8280ab63876efd6eb292c21430ac607;hpb=5c9b45285dd64723ad1dac380b98a7b1f3095674 diff --git a/source/common/x86/asm-primitives.cpp b/source/common/x86/asm-primitives.cpp index ec1607d..b81115b 100644 --- a/source/common/x86/asm-primitives.cpp +++ b/source/common/x86/asm-primitives.cpp @@ -1336,11 +1336,22 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) p.sad_x4[LUMA_16x4] = x265_pixel_sad_x4_16x4_sse2; p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_mmx2; - p.cvt32to16_shr = x265_cvt32to16_shr_sse2; - p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_sse2; - p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_sse2; - p.cvt32to16_shl[BLOCK_16x16] = x265_cvt32to16_shl_16_sse2; - p.cvt32to16_shl[BLOCK_32x32] = x265_cvt32to16_shl_32_sse2; + p.cpy2Dto1D_shl[BLOCK_4x4] = x265_cpy2Dto1D_shl_4_sse2; + p.cpy2Dto1D_shl[BLOCK_8x8] = x265_cpy2Dto1D_shl_8_sse2; + p.cpy2Dto1D_shl[BLOCK_16x16] = x265_cpy2Dto1D_shl_16_sse2; + p.cpy2Dto1D_shl[BLOCK_32x32] = x265_cpy2Dto1D_shl_32_sse2; + p.cpy2Dto1D_shr[BLOCK_4x4] = x265_cpy2Dto1D_shr_4_sse2; + p.cpy2Dto1D_shr[BLOCK_8x8] = x265_cpy2Dto1D_shr_8_sse2; + p.cpy2Dto1D_shr[BLOCK_16x16] = x265_cpy2Dto1D_shr_16_sse2; + p.cpy2Dto1D_shr[BLOCK_32x32] = x265_cpy2Dto1D_shr_32_sse2; + p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_sse2; + p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_sse2; + p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_sse2; + p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_sse2; + p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_sse2; + p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_sse2; + p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_sse2; + p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_sse2; CHROMA_PIXELSUB_PS(_sse2); CHROMA_PIXELSUB_PS_422(_sse2); @@ -1354,9 +1365,9 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) CHROMA_VERT_FILTERS_422(_sse2); CHROMA_VERT_FILTERS_444(_sse2); p.luma_p2s = x265_luma_p2s_sse2; - p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_sse2; - p.chroma_p2s[X265_CSP_I422] = x265_chroma_p2s_sse2; - p.chroma_p2s[X265_CSP_I444] = x265_luma_p2s_sse2; // for i444 , chroma_p2s can be replaced by luma_p2s + p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_sse2; + p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_sse2; + p.chroma[X265_CSP_I444].p2s = x265_luma_p2s_sse2; // for i444 , chroma_p2s can be replaced by luma_p2s p.blockfill_s[BLOCK_4x4] = x265_blockfill_s_4x4_sse2; p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2; @@ -1376,6 +1387,9 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) p.dct[DCT_4x4] = x265_dct4_sse2; p.idct[IDCT_4x4] = x265_idct4_sse2; +#if X86_64 + p.idct[IDCT_8x8] = x265_idct8_sse2; +#endif p.idct[IDST_4x4] = x265_idst4_sse2; LUMA_SS_FILTERS(_sse2); @@ -1407,11 +1421,6 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) p.quant = x265_quant_sse4; p.nquant = x265_nquant_sse4; p.dequant_normal = x265_dequant_normal_sse4; - p.cvt16to32_shl = x265_cvt16to32_shl_sse4; - p.cvt16to32_shr[BLOCK_4x4] = x265_cvt16to32_shr_4_sse4; - p.cvt16to32_shr[BLOCK_8x8] = x265_cvt16to32_shr_8_sse4; - p.cvt16to32_shr[BLOCK_16x16] = x265_cvt16to32_shr_16_sse4; - p.cvt16to32_shr[BLOCK_32x32] = x265_cvt16to32_shr_32_sse4; p.intra_pred[0][BLOCK_4x4] = x265_intra_pred_planar4_sse4; p.intra_pred[0][BLOCK_8x8] = x265_intra_pred_planar8_sse4; p.intra_pred[0][BLOCK_16x16] = x265_intra_pred_planar16_sse4; @@ -1428,7 +1437,7 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) } if (cpuMask & X265_CPU_XOP) { - p.frame_init_lowres_core = x265_frame_init_lowres_core_xop; + p.frameInitLowres = x265_frame_init_lowres_core_xop; SA8D_INTER_FROM_BLOCK(xop); INIT7(satd, _xop); HEVC_SATD(xop); @@ -1440,6 +1449,14 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) p.nquant = x265_nquant_avx2; p.dequant_normal = x265_dequant_normal_avx2; p.scale1D_128to64 = x265_scale1D_128to64_avx2; + p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_avx2; + p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_avx2; + p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_avx2; + p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_avx2; + p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_avx2; + p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_avx2; + p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_avx2; + p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_avx2; #if X86_64 p.dct[DCT_8x8] = x265_dct8_avx2; p.dct[DCT_16x16] = x265_dct16_avx2; @@ -1448,7 +1465,6 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) p.idct[IDCT_8x8] = x265_idct8_avx2; p.idct[IDCT_16x16] = x265_idct16_avx2; p.idct[IDCT_32x32] = x265_idct32_avx2; - p.transpose[BLOCK_8x8] = x265_transpose8_avx2; p.transpose[BLOCK_16x16] = x265_transpose16_avx2; p.transpose[BLOCK_32x32] = x265_transpose32_avx2; @@ -1500,7 +1516,7 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) INIT8(sad_x4, _mmx2); p.satd[LUMA_4x4] = x265_pixel_satd_4x4_mmx2; p.sa8d_inter[LUMA_4x4] = x265_pixel_satd_4x4_mmx2; - p.frame_init_lowres_core = x265_frame_init_lowres_core_mmx2; + p.frameInitLowres = x265_frame_init_lowres_core_mmx2; PIXEL_AVG(sse2); PIXEL_AVG_W4(mmx2); @@ -1548,14 +1564,26 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) p.ssd_s[BLOCK_16x16] = x265_pixel_ssd_s_16_sse2; p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_sse2; - p.frame_init_lowres_core = x265_frame_init_lowres_core_sse2; + p.frameInitLowres = x265_frame_init_lowres_core_sse2; SA8D_INTER_FROM_BLOCK(sse2); - p.cvt32to16_shr = x265_cvt32to16_shr_sse2; - p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_sse2; - p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_sse2; - p.cvt32to16_shl[BLOCK_16x16] = x265_cvt32to16_shl_16_sse2; - p.cvt32to16_shl[BLOCK_32x32] = x265_cvt32to16_shl_32_sse2; + p.cpy2Dto1D_shl[BLOCK_4x4] = x265_cpy2Dto1D_shl_4_sse2; + p.cpy2Dto1D_shl[BLOCK_8x8] = x265_cpy2Dto1D_shl_8_sse2; + p.cpy2Dto1D_shl[BLOCK_16x16] = x265_cpy2Dto1D_shl_16_sse2; + p.cpy2Dto1D_shl[BLOCK_32x32] = x265_cpy2Dto1D_shl_32_sse2; + p.cpy2Dto1D_shr[BLOCK_4x4] = x265_cpy2Dto1D_shr_4_sse2; + p.cpy2Dto1D_shr[BLOCK_8x8] = x265_cpy2Dto1D_shr_8_sse2; + p.cpy2Dto1D_shr[BLOCK_16x16] = x265_cpy2Dto1D_shr_16_sse2; + p.cpy2Dto1D_shr[BLOCK_32x32] = x265_cpy2Dto1D_shr_32_sse2; + p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_sse2; + p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_sse2; + p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_sse2; + p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_sse2; + p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_sse2; + p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_sse2; + p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_sse2; + p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_sse2; + p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2; p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2; p.transpose[BLOCK_4x4] = x265_transpose4_sse2; @@ -1565,18 +1593,19 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) p.transpose[BLOCK_64x64] = x265_transpose64_sse2; p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2; p.ssim_end_4 = x265_pixel_ssim_end4_sse2; + p.dct[DCT_4x4] = x265_dct4_sse2; p.idct[IDCT_4x4] = x265_idct4_sse2; +#if X86_64 + p.idct[IDCT_8x8] = x265_idct8_sse2; +#endif p.idct[IDST_4x4] = x265_idst4_sse2; + p.planecopy_sp = x265_downShift_16_sse2; - p.copy_shl[BLOCK_4x4] = x265_copy_shl_4_sse2; - p.copy_shl[BLOCK_8x8] = x265_copy_shl_8_sse2; - p.copy_shl[BLOCK_16x16] = x265_copy_shl_16_sse2; - p.copy_shl[BLOCK_32x32] = x265_copy_shl_32_sse2; } if (cpuMask & X265_CPU_SSSE3) { - p.frame_init_lowres_core = x265_frame_init_lowres_core_ssse3; + p.frameInitLowres = x265_frame_init_lowres_core_ssse3; SA8D_INTER_FROM_BLOCK(ssse3); p.sse_pp[LUMA_4x4] = x265_pixel_ssd_4x4_ssse3; ASSGN_SSE(ssse3); @@ -1601,9 +1630,9 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3; p.luma_p2s = x265_luma_p2s_ssse3; - p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_ssse3; - p.chroma_p2s[X265_CSP_I422] = x265_chroma_p2s_ssse3; - p.chroma_p2s[X265_CSP_I444] = x265_luma_p2s_ssse3; // for i444 , chroma_p2s can be replaced by luma_p2s + p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_ssse3; + p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_ssse3; + p.chroma[X265_CSP_I444].p2s = x265_luma_p2s_ssse3; // for i444, chroma_p2s can use luma_p2s p.dct[DST_4x4] = x265_dst4_ssse3; p.idct[IDCT_8x8] = x265_idct8_ssse3; @@ -1616,11 +1645,6 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) LUMA_ADDAVG(_sse4); CHROMA_ADDAVG(_sse4); CHROMA_ADDAVG_422(_sse4); - p.cvt16to32_shl = x265_cvt16to32_shl_sse4; - p.cvt16to32_shr[BLOCK_4x4] = x265_cvt16to32_shr_4_sse4; - p.cvt16to32_shr[BLOCK_8x8] = x265_cvt16to32_shr_8_sse4; - p.cvt16to32_shr[BLOCK_16x16] = x265_cvt16to32_shr_16_sse4; - p.cvt16to32_shr[BLOCK_32x32] = x265_cvt16to32_shr_32_sse4; // TODO: check POPCNT flag! p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_sse4; @@ -1690,12 +1714,11 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) INTRA_ANG_SSE4(sse4); p.dct[DCT_8x8] = x265_dct8_sse4; - p.copy_shr = x265_copy_shr_sse4; - p.denoiseDct = x265_denoise_dct_sse4; +// p.denoiseDct = x265_denoise_dct_sse4; } if (cpuMask & X265_CPU_AVX) { - p.frame_init_lowres_core = x265_frame_init_lowres_core_avx; + p.frameInitLowres = x265_frame_init_lowres_core_avx; HEVC_SATD(avx); SA8D_INTER_FROM_BLOCK(avx); ASSGN_SSE(avx); @@ -1736,7 +1759,7 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) } if (cpuMask & X265_CPU_XOP) { - p.frame_init_lowres_core = x265_frame_init_lowres_core_xop; + p.frameInitLowres = x265_frame_init_lowres_core_xop; SA8D_INTER_FROM_BLOCK(xop); INIT7(satd, _xop); INIT5_NAME(sse_pp, ssd, _xop); @@ -1761,15 +1784,21 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_avx2; p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_avx2; - p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_avx2; - p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_avx2; - p.cvt32to16_shl[BLOCK_16x16] = x265_cvt32to16_shl_16_avx2; - p.cvt32to16_shl[BLOCK_32x32] = x265_cvt32to16_shl_32_avx2; - p.denoiseDct = x265_denoise_dct_avx2; + p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_avx2; + p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_avx2; + p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_avx2; + p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_avx2; + p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_avx2; + p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_avx2; + p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_avx2; + p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_avx2; + +// p.denoiseDct = x265_denoise_dct_avx2; p.dct[DCT_4x4] = x265_dct4_avx2; p.quant = x265_quant_avx2; p.nquant = x265_nquant_avx2; p.dequant_normal = x265_dequant_normal_avx2; + p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x4] = x265_blockcopy_ss_16x4_avx; p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x12] = x265_blockcopy_ss_16x12_avx; p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x8] = x265_blockcopy_ss_16x8_avx; @@ -1785,6 +1814,7 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) p.weight_pp = x265_weight_pp_avx2; #if X86_64 + p.dct[DCT_8x8] = x265_dct8_avx2; p.dct[DCT_16x16] = x265_dct16_avx2; p.dct[DCT_32x32] = x265_dct32_avx2; @@ -1797,8 +1827,83 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask) p.transpose[BLOCK_16x16] = x265_transpose16_avx2; p.transpose[BLOCK_32x32] = x265_transpose32_avx2; p.transpose[BLOCK_64x64] = x265_transpose64_avx2; + + p.luma_vpp[LUMA_12x16] = x265_interp_8tap_vert_pp_12x16_avx2; + + p.luma_vpp[LUMA_16x4] = x265_interp_8tap_vert_pp_16x4_avx2; + p.luma_vpp[LUMA_16x8] = x265_interp_8tap_vert_pp_16x8_avx2; + p.luma_vpp[LUMA_16x12] = x265_interp_8tap_vert_pp_16x12_avx2; + p.luma_vpp[LUMA_16x16] = x265_interp_8tap_vert_pp_16x16_avx2; + p.luma_vpp[LUMA_16x32] = x265_interp_8tap_vert_pp_16x32_avx2; + p.luma_vpp[LUMA_16x64] = x265_interp_8tap_vert_pp_16x64_avx2; + + p.luma_vpp[LUMA_24x32] = x265_interp_8tap_vert_pp_24x32_avx2; + + p.luma_vpp[LUMA_32x8] = x265_interp_8tap_vert_pp_32x8_avx2; + p.luma_vpp[LUMA_32x16] = x265_interp_8tap_vert_pp_32x16_avx2; + p.luma_vpp[LUMA_32x24] = x265_interp_8tap_vert_pp_32x24_avx2; + p.luma_vpp[LUMA_32x32] = x265_interp_8tap_vert_pp_32x32_avx2; + p.luma_vpp[LUMA_32x64] = x265_interp_8tap_vert_pp_32x64_avx2; + + p.luma_vpp[LUMA_48x64] = x265_interp_8tap_vert_pp_48x64_avx2; + + p.luma_vpp[LUMA_64x16] = x265_interp_8tap_vert_pp_64x16_avx2; + p.luma_vpp[LUMA_64x32] = x265_interp_8tap_vert_pp_64x32_avx2; + p.luma_vpp[LUMA_64x48] = x265_interp_8tap_vert_pp_64x48_avx2; + p.luma_vpp[LUMA_64x64] = x265_interp_8tap_vert_pp_64x64_avx2; #endif p.luma_hpp[LUMA_4x4] = x265_interp_8tap_horiz_pp_4x4_avx2; + + p.luma_hpp[LUMA_8x4] = x265_interp_8tap_horiz_pp_8x4_avx2; + p.luma_hpp[LUMA_8x8] = x265_interp_8tap_horiz_pp_8x8_avx2; + p.luma_hpp[LUMA_8x16] = x265_interp_8tap_horiz_pp_8x16_avx2; + p.luma_hpp[LUMA_8x32] = x265_interp_8tap_horiz_pp_8x32_avx2; + + p.luma_hpp[LUMA_16x4] = x265_interp_8tap_horiz_pp_16x4_avx2; + p.luma_hpp[LUMA_16x8] = x265_interp_8tap_horiz_pp_16x8_avx2; + p.luma_hpp[LUMA_16x12] = x265_interp_8tap_horiz_pp_16x12_avx2; + p.luma_hpp[LUMA_16x16] = x265_interp_8tap_horiz_pp_16x16_avx2; + p.luma_hpp[LUMA_16x32] = x265_interp_8tap_horiz_pp_16x32_avx2; + p.luma_hpp[LUMA_16x64] = x265_interp_8tap_horiz_pp_16x64_avx2; + + p.luma_hpp[LUMA_32x8] = x265_interp_8tap_horiz_pp_32x8_avx2; + p.luma_hpp[LUMA_32x16] = x265_interp_8tap_horiz_pp_32x16_avx2; + p.luma_hpp[LUMA_32x24] = x265_interp_8tap_horiz_pp_32x24_avx2; + p.luma_hpp[LUMA_32x32] = x265_interp_8tap_horiz_pp_32x32_avx2; + p.luma_hpp[LUMA_32x64] = x265_interp_8tap_horiz_pp_32x64_avx2; + + p.luma_hpp[LUMA_64x64] = x265_interp_8tap_horiz_pp_64x64_avx2; + p.luma_hpp[LUMA_64x48] = x265_interp_8tap_horiz_pp_64x48_avx2; + p.luma_hpp[LUMA_64x32] = x265_interp_8tap_horiz_pp_64x32_avx2; + p.luma_hpp[LUMA_64x16] = x265_interp_8tap_horiz_pp_64x16_avx2; + + p.luma_hpp[LUMA_48x64] = x265_interp_8tap_horiz_pp_48x64_avx2; + + p.chroma[X265_CSP_I420].filter_hpp[CHROMA_8x8] = x265_interp_4tap_horiz_pp_8x8_avx2; + p.chroma[X265_CSP_I420].filter_hpp[CHROMA_4x4] = x265_interp_4tap_horiz_pp_4x4_avx2; + p.chroma[X265_CSP_I420].filter_hpp[CHROMA_32x32] = x265_interp_4tap_horiz_pp_32x32_avx2; + p.chroma[X265_CSP_I420].filter_hpp[CHROMA_16x16] = x265_interp_4tap_horiz_pp_16x16_avx2; + + p.luma_vpp[LUMA_4x4] = x265_interp_8tap_vert_pp_4x4_avx2; + + p.luma_vpp[LUMA_8x4] = x265_interp_8tap_vert_pp_8x4_avx2; + p.luma_vpp[LUMA_8x8] = x265_interp_8tap_vert_pp_8x8_avx2; + p.luma_vpp[LUMA_8x16] = x265_interp_8tap_vert_pp_8x16_avx2; + p.luma_vpp[LUMA_8x32] = x265_interp_8tap_vert_pp_8x32_avx2; + + // color space i420 + p.chroma[X265_CSP_I420].filter_vpp[CHROMA_4x4] = x265_interp_4tap_vert_pp_4x4_avx2; + p.chroma[X265_CSP_I420].filter_vpp[CHROMA_8x8] = x265_interp_4tap_vert_pp_8x8_avx2; + + // color space i422 + p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_4x4] = x265_interp_4tap_vert_pp_4x4_avx2; + + p.luma_vps[LUMA_4x4] = x265_interp_8tap_vert_ps_4x4_avx2; + +#if X86_64 + p.chroma[X265_CSP_I420].filter_vpp[CHROMA_16x16] = x265_interp_4tap_vert_pp_16x16_avx2; + p.chroma[X265_CSP_I420].filter_vpp[CHROMA_32x32] = x265_interp_4tap_vert_pp_32x32_avx2; +#endif } #endif // if HIGH_BIT_DEPTH }