Imported Upstream version 1.4+222+hg5f9f7194267b
[deb_x265.git] / source / common / x86 / asm-primitives.cpp
index ec1607dfa6f2fa7e69f22261f78646d775d5359f..b81115b304e3b1565ac9e0cf4fb2a77d69716a0b 100644 (file)
@@ -1336,11 +1336,22 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         p.sad_x4[LUMA_16x4] = x265_pixel_sad_x4_16x4_sse2;
         p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_mmx2;
 
-        p.cvt32to16_shr = x265_cvt32to16_shr_sse2;
-        p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_sse2;
-        p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_sse2;
-        p.cvt32to16_shl[BLOCK_16x16] = x265_cvt32to16_shl_16_sse2;
-        p.cvt32to16_shl[BLOCK_32x32] = x265_cvt32to16_shl_32_sse2;
+        p.cpy2Dto1D_shl[BLOCK_4x4] = x265_cpy2Dto1D_shl_4_sse2;
+        p.cpy2Dto1D_shl[BLOCK_8x8] = x265_cpy2Dto1D_shl_8_sse2;
+        p.cpy2Dto1D_shl[BLOCK_16x16] = x265_cpy2Dto1D_shl_16_sse2;
+        p.cpy2Dto1D_shl[BLOCK_32x32] = x265_cpy2Dto1D_shl_32_sse2;
+        p.cpy2Dto1D_shr[BLOCK_4x4] = x265_cpy2Dto1D_shr_4_sse2;
+        p.cpy2Dto1D_shr[BLOCK_8x8] = x265_cpy2Dto1D_shr_8_sse2;
+        p.cpy2Dto1D_shr[BLOCK_16x16] = x265_cpy2Dto1D_shr_16_sse2;
+        p.cpy2Dto1D_shr[BLOCK_32x32] = x265_cpy2Dto1D_shr_32_sse2;
+        p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_sse2;
+        p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_sse2;
+        p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_sse2;
+        p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_sse2;
+        p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_sse2;
+        p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_sse2;
+        p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_sse2;
+        p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_sse2;
 
         CHROMA_PIXELSUB_PS(_sse2);
         CHROMA_PIXELSUB_PS_422(_sse2);
@@ -1354,9 +1365,9 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         CHROMA_VERT_FILTERS_422(_sse2);
         CHROMA_VERT_FILTERS_444(_sse2);
         p.luma_p2s = x265_luma_p2s_sse2;
-        p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_sse2;
-        p.chroma_p2s[X265_CSP_I422] = x265_chroma_p2s_sse2;
-        p.chroma_p2s[X265_CSP_I444] = x265_luma_p2s_sse2; // for i444 , chroma_p2s can be replaced by luma_p2s
+        p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_sse2;
+        p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_sse2;
+        p.chroma[X265_CSP_I444].p2s = x265_luma_p2s_sse2; // for i444 , chroma_p2s can be replaced by luma_p2s
 
         p.blockfill_s[BLOCK_4x4] = x265_blockfill_s_4x4_sse2;
         p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2;
@@ -1376,6 +1387,9 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
 
         p.dct[DCT_4x4] = x265_dct4_sse2;
         p.idct[IDCT_4x4] = x265_idct4_sse2;
+#if X86_64
+        p.idct[IDCT_8x8] = x265_idct8_sse2;
+#endif
         p.idct[IDST_4x4] = x265_idst4_sse2;
 
         LUMA_SS_FILTERS(_sse2);
@@ -1407,11 +1421,6 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         p.quant = x265_quant_sse4;
         p.nquant = x265_nquant_sse4;
         p.dequant_normal = x265_dequant_normal_sse4;
-        p.cvt16to32_shl = x265_cvt16to32_shl_sse4;
-        p.cvt16to32_shr[BLOCK_4x4] = x265_cvt16to32_shr_4_sse4;
-        p.cvt16to32_shr[BLOCK_8x8] = x265_cvt16to32_shr_8_sse4;
-        p.cvt16to32_shr[BLOCK_16x16] = x265_cvt16to32_shr_16_sse4;
-        p.cvt16to32_shr[BLOCK_32x32] = x265_cvt16to32_shr_32_sse4;
         p.intra_pred[0][BLOCK_4x4] = x265_intra_pred_planar4_sse4;
         p.intra_pred[0][BLOCK_8x8] = x265_intra_pred_planar8_sse4;
         p.intra_pred[0][BLOCK_16x16] = x265_intra_pred_planar16_sse4;
@@ -1428,7 +1437,7 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
     }
     if (cpuMask & X265_CPU_XOP)
     {
-        p.frame_init_lowres_core = x265_frame_init_lowres_core_xop;
+        p.frameInitLowres = x265_frame_init_lowres_core_xop;
         SA8D_INTER_FROM_BLOCK(xop);
         INIT7(satd, _xop);
         HEVC_SATD(xop);
@@ -1440,6 +1449,14 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         p.nquant = x265_nquant_avx2;
         p.dequant_normal = x265_dequant_normal_avx2;
         p.scale1D_128to64 = x265_scale1D_128to64_avx2;
+        p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_avx2;
+        p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_avx2;
+        p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_avx2;
+        p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_avx2;
+        p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_avx2;
+        p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_avx2;
+        p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_avx2;
+        p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_avx2;
 #if X86_64
         p.dct[DCT_8x8] = x265_dct8_avx2;
         p.dct[DCT_16x16] = x265_dct16_avx2;
@@ -1448,7 +1465,6 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         p.idct[IDCT_8x8] = x265_idct8_avx2;
         p.idct[IDCT_16x16] = x265_idct16_avx2;
         p.idct[IDCT_32x32] = x265_idct32_avx2;
-
         p.transpose[BLOCK_8x8] = x265_transpose8_avx2;
         p.transpose[BLOCK_16x16] = x265_transpose16_avx2;
         p.transpose[BLOCK_32x32] = x265_transpose32_avx2;
@@ -1500,7 +1516,7 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         INIT8(sad_x4, _mmx2);
         p.satd[LUMA_4x4] = x265_pixel_satd_4x4_mmx2;
         p.sa8d_inter[LUMA_4x4]  = x265_pixel_satd_4x4_mmx2;
-        p.frame_init_lowres_core = x265_frame_init_lowres_core_mmx2;
+        p.frameInitLowres = x265_frame_init_lowres_core_mmx2;
 
         PIXEL_AVG(sse2);
         PIXEL_AVG_W4(mmx2);
@@ -1548,14 +1564,26 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         p.ssd_s[BLOCK_16x16] = x265_pixel_ssd_s_16_sse2;
         p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_sse2;
 
-        p.frame_init_lowres_core = x265_frame_init_lowres_core_sse2;
+        p.frameInitLowres = x265_frame_init_lowres_core_sse2;
         SA8D_INTER_FROM_BLOCK(sse2);
 
-        p.cvt32to16_shr = x265_cvt32to16_shr_sse2;
-        p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_sse2;
-        p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_sse2;
-        p.cvt32to16_shl[BLOCK_16x16] = x265_cvt32to16_shl_16_sse2;
-        p.cvt32to16_shl[BLOCK_32x32] = x265_cvt32to16_shl_32_sse2;
+        p.cpy2Dto1D_shl[BLOCK_4x4] = x265_cpy2Dto1D_shl_4_sse2;
+        p.cpy2Dto1D_shl[BLOCK_8x8] = x265_cpy2Dto1D_shl_8_sse2;
+        p.cpy2Dto1D_shl[BLOCK_16x16] = x265_cpy2Dto1D_shl_16_sse2;
+        p.cpy2Dto1D_shl[BLOCK_32x32] = x265_cpy2Dto1D_shl_32_sse2;
+        p.cpy2Dto1D_shr[BLOCK_4x4] = x265_cpy2Dto1D_shr_4_sse2;
+        p.cpy2Dto1D_shr[BLOCK_8x8] = x265_cpy2Dto1D_shr_8_sse2;
+        p.cpy2Dto1D_shr[BLOCK_16x16] = x265_cpy2Dto1D_shr_16_sse2;
+        p.cpy2Dto1D_shr[BLOCK_32x32] = x265_cpy2Dto1D_shr_32_sse2;
+        p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_sse2;
+        p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_sse2;
+        p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_sse2;
+        p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_sse2;
+        p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_sse2;
+        p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_sse2;
+        p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_sse2;
+        p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_sse2;
+
         p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
         p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
         p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
@@ -1565,18 +1593,19 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         p.transpose[BLOCK_64x64] = x265_transpose64_sse2;
         p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2;
         p.ssim_end_4 = x265_pixel_ssim_end4_sse2;
+
         p.dct[DCT_4x4] = x265_dct4_sse2;
         p.idct[IDCT_4x4] = x265_idct4_sse2;
+#if X86_64
+        p.idct[IDCT_8x8] = x265_idct8_sse2;
+#endif
         p.idct[IDST_4x4] = x265_idst4_sse2;
+
         p.planecopy_sp = x265_downShift_16_sse2;
-        p.copy_shl[BLOCK_4x4] = x265_copy_shl_4_sse2;
-        p.copy_shl[BLOCK_8x8] = x265_copy_shl_8_sse2;
-        p.copy_shl[BLOCK_16x16] = x265_copy_shl_16_sse2;
-        p.copy_shl[BLOCK_32x32] = x265_copy_shl_32_sse2;
     }
     if (cpuMask & X265_CPU_SSSE3)
     {
-        p.frame_init_lowres_core = x265_frame_init_lowres_core_ssse3;
+        p.frameInitLowres = x265_frame_init_lowres_core_ssse3;
         SA8D_INTER_FROM_BLOCK(ssse3);
         p.sse_pp[LUMA_4x4] = x265_pixel_ssd_4x4_ssse3;
         ASSGN_SSE(ssse3);
@@ -1601,9 +1630,9 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
 
         p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3;
         p.luma_p2s = x265_luma_p2s_ssse3;
-        p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_ssse3;
-        p.chroma_p2s[X265_CSP_I422] = x265_chroma_p2s_ssse3;
-        p.chroma_p2s[X265_CSP_I444] = x265_luma_p2s_ssse3; // for i444 , chroma_p2s can be replaced by luma_p2s
+        p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_ssse3;
+        p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_ssse3;
+        p.chroma[X265_CSP_I444].p2s = x265_luma_p2s_ssse3; // for i444, chroma_p2s can use luma_p2s
 
         p.dct[DST_4x4] = x265_dst4_ssse3;
         p.idct[IDCT_8x8] = x265_idct8_ssse3;
@@ -1616,11 +1645,6 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         LUMA_ADDAVG(_sse4);
         CHROMA_ADDAVG(_sse4);
         CHROMA_ADDAVG_422(_sse4);
-        p.cvt16to32_shl = x265_cvt16to32_shl_sse4;
-        p.cvt16to32_shr[BLOCK_4x4] = x265_cvt16to32_shr_4_sse4;
-        p.cvt16to32_shr[BLOCK_8x8] = x265_cvt16to32_shr_8_sse4;
-        p.cvt16to32_shr[BLOCK_16x16] = x265_cvt16to32_shr_16_sse4;
-        p.cvt16to32_shr[BLOCK_32x32] = x265_cvt16to32_shr_32_sse4;
 
         // TODO: check POPCNT flag!
         p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_sse4;
@@ -1690,12 +1714,11 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         INTRA_ANG_SSE4(sse4);
 
         p.dct[DCT_8x8] = x265_dct8_sse4;
-        p.copy_shr = x265_copy_shr_sse4;
-        p.denoiseDct = x265_denoise_dct_sse4;
+//        p.denoiseDct = x265_denoise_dct_sse4;
     }
     if (cpuMask & X265_CPU_AVX)
     {
-        p.frame_init_lowres_core = x265_frame_init_lowres_core_avx;
+        p.frameInitLowres = x265_frame_init_lowres_core_avx;
         HEVC_SATD(avx);
         SA8D_INTER_FROM_BLOCK(avx);
         ASSGN_SSE(avx);
@@ -1736,7 +1759,7 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
     }
     if (cpuMask & X265_CPU_XOP)
     {
-        p.frame_init_lowres_core = x265_frame_init_lowres_core_xop;
+        p.frameInitLowres = x265_frame_init_lowres_core_xop;
         SA8D_INTER_FROM_BLOCK(xop);
         INIT7(satd, _xop);
         INIT5_NAME(sse_pp, ssd, _xop);
@@ -1761,15 +1784,21 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_avx2;
         p.blockfill_s[BLOCK_32x32] = x265_blockfill_s_32x32_avx2;
 
-        p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_avx2;
-        p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_avx2;
-        p.cvt32to16_shl[BLOCK_16x16] = x265_cvt32to16_shl_16_avx2;
-        p.cvt32to16_shl[BLOCK_32x32] = x265_cvt32to16_shl_32_avx2;
-        p.denoiseDct = x265_denoise_dct_avx2;
+        p.cpy1Dto2D_shl[BLOCK_4x4] = x265_cpy1Dto2D_shl_4_avx2;
+        p.cpy1Dto2D_shl[BLOCK_8x8] = x265_cpy1Dto2D_shl_8_avx2;
+        p.cpy1Dto2D_shl[BLOCK_16x16] = x265_cpy1Dto2D_shl_16_avx2;
+        p.cpy1Dto2D_shl[BLOCK_32x32] = x265_cpy1Dto2D_shl_32_avx2;
+        p.cpy1Dto2D_shr[BLOCK_4x4] = x265_cpy1Dto2D_shr_4_avx2;
+        p.cpy1Dto2D_shr[BLOCK_8x8] = x265_cpy1Dto2D_shr_8_avx2;
+        p.cpy1Dto2D_shr[BLOCK_16x16] = x265_cpy1Dto2D_shr_16_avx2;
+        p.cpy1Dto2D_shr[BLOCK_32x32] = x265_cpy1Dto2D_shr_32_avx2;
+
+//        p.denoiseDct = x265_denoise_dct_avx2;
         p.dct[DCT_4x4] = x265_dct4_avx2;
         p.quant = x265_quant_avx2;
         p.nquant = x265_nquant_avx2;
         p.dequant_normal = x265_dequant_normal_avx2;
+
         p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x4] = x265_blockcopy_ss_16x4_avx;
         p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x12] = x265_blockcopy_ss_16x12_avx;
         p.chroma[X265_CSP_I420].copy_ss[CHROMA_16x8] = x265_blockcopy_ss_16x8_avx;
@@ -1785,6 +1814,7 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         p.weight_pp = x265_weight_pp_avx2;
 
 #if X86_64
+
         p.dct[DCT_8x8] = x265_dct8_avx2;
         p.dct[DCT_16x16] = x265_dct16_avx2;
         p.dct[DCT_32x32] = x265_dct32_avx2;
@@ -1797,8 +1827,83 @@ void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
         p.transpose[BLOCK_16x16] = x265_transpose16_avx2;
         p.transpose[BLOCK_32x32] = x265_transpose32_avx2;
         p.transpose[BLOCK_64x64] = x265_transpose64_avx2;
+
+        p.luma_vpp[LUMA_12x16] = x265_interp_8tap_vert_pp_12x16_avx2;
+
+        p.luma_vpp[LUMA_16x4] = x265_interp_8tap_vert_pp_16x4_avx2;
+        p.luma_vpp[LUMA_16x8] = x265_interp_8tap_vert_pp_16x8_avx2;
+        p.luma_vpp[LUMA_16x12] = x265_interp_8tap_vert_pp_16x12_avx2;
+        p.luma_vpp[LUMA_16x16] = x265_interp_8tap_vert_pp_16x16_avx2;
+        p.luma_vpp[LUMA_16x32] = x265_interp_8tap_vert_pp_16x32_avx2;
+        p.luma_vpp[LUMA_16x64] = x265_interp_8tap_vert_pp_16x64_avx2;
+
+        p.luma_vpp[LUMA_24x32] = x265_interp_8tap_vert_pp_24x32_avx2;
+
+        p.luma_vpp[LUMA_32x8] = x265_interp_8tap_vert_pp_32x8_avx2;
+        p.luma_vpp[LUMA_32x16] = x265_interp_8tap_vert_pp_32x16_avx2;
+        p.luma_vpp[LUMA_32x24] = x265_interp_8tap_vert_pp_32x24_avx2;
+        p.luma_vpp[LUMA_32x32] = x265_interp_8tap_vert_pp_32x32_avx2;
+        p.luma_vpp[LUMA_32x64] = x265_interp_8tap_vert_pp_32x64_avx2;
+
+        p.luma_vpp[LUMA_48x64] = x265_interp_8tap_vert_pp_48x64_avx2;
+
+        p.luma_vpp[LUMA_64x16] = x265_interp_8tap_vert_pp_64x16_avx2;
+        p.luma_vpp[LUMA_64x32] = x265_interp_8tap_vert_pp_64x32_avx2;
+        p.luma_vpp[LUMA_64x48] = x265_interp_8tap_vert_pp_64x48_avx2;
+        p.luma_vpp[LUMA_64x64] = x265_interp_8tap_vert_pp_64x64_avx2;
 #endif
         p.luma_hpp[LUMA_4x4] = x265_interp_8tap_horiz_pp_4x4_avx2;
+
+        p.luma_hpp[LUMA_8x4] = x265_interp_8tap_horiz_pp_8x4_avx2;
+        p.luma_hpp[LUMA_8x8] = x265_interp_8tap_horiz_pp_8x8_avx2;
+        p.luma_hpp[LUMA_8x16] = x265_interp_8tap_horiz_pp_8x16_avx2;
+        p.luma_hpp[LUMA_8x32] = x265_interp_8tap_horiz_pp_8x32_avx2;
+
+        p.luma_hpp[LUMA_16x4] = x265_interp_8tap_horiz_pp_16x4_avx2;
+        p.luma_hpp[LUMA_16x8] = x265_interp_8tap_horiz_pp_16x8_avx2;
+        p.luma_hpp[LUMA_16x12] = x265_interp_8tap_horiz_pp_16x12_avx2;
+        p.luma_hpp[LUMA_16x16] = x265_interp_8tap_horiz_pp_16x16_avx2;
+        p.luma_hpp[LUMA_16x32] = x265_interp_8tap_horiz_pp_16x32_avx2;
+        p.luma_hpp[LUMA_16x64] = x265_interp_8tap_horiz_pp_16x64_avx2;
+
+        p.luma_hpp[LUMA_32x8] = x265_interp_8tap_horiz_pp_32x8_avx2;
+        p.luma_hpp[LUMA_32x16] = x265_interp_8tap_horiz_pp_32x16_avx2;
+        p.luma_hpp[LUMA_32x24] = x265_interp_8tap_horiz_pp_32x24_avx2;
+        p.luma_hpp[LUMA_32x32] = x265_interp_8tap_horiz_pp_32x32_avx2;
+        p.luma_hpp[LUMA_32x64] = x265_interp_8tap_horiz_pp_32x64_avx2;
+
+        p.luma_hpp[LUMA_64x64] = x265_interp_8tap_horiz_pp_64x64_avx2;
+        p.luma_hpp[LUMA_64x48] = x265_interp_8tap_horiz_pp_64x48_avx2;
+        p.luma_hpp[LUMA_64x32] = x265_interp_8tap_horiz_pp_64x32_avx2;
+        p.luma_hpp[LUMA_64x16] = x265_interp_8tap_horiz_pp_64x16_avx2;
+
+        p.luma_hpp[LUMA_48x64] = x265_interp_8tap_horiz_pp_48x64_avx2;
+
+        p.chroma[X265_CSP_I420].filter_hpp[CHROMA_8x8] = x265_interp_4tap_horiz_pp_8x8_avx2;
+        p.chroma[X265_CSP_I420].filter_hpp[CHROMA_4x4] = x265_interp_4tap_horiz_pp_4x4_avx2;
+        p.chroma[X265_CSP_I420].filter_hpp[CHROMA_32x32] = x265_interp_4tap_horiz_pp_32x32_avx2;
+        p.chroma[X265_CSP_I420].filter_hpp[CHROMA_16x16] = x265_interp_4tap_horiz_pp_16x16_avx2;
+
+        p.luma_vpp[LUMA_4x4] = x265_interp_8tap_vert_pp_4x4_avx2;
+
+        p.luma_vpp[LUMA_8x4] = x265_interp_8tap_vert_pp_8x4_avx2;
+        p.luma_vpp[LUMA_8x8] = x265_interp_8tap_vert_pp_8x8_avx2;
+        p.luma_vpp[LUMA_8x16] = x265_interp_8tap_vert_pp_8x16_avx2;
+        p.luma_vpp[LUMA_8x32] = x265_interp_8tap_vert_pp_8x32_avx2;
+
+        // color space i420
+        p.chroma[X265_CSP_I420].filter_vpp[CHROMA_4x4] = x265_interp_4tap_vert_pp_4x4_avx2;
+        p.chroma[X265_CSP_I420].filter_vpp[CHROMA_8x8] = x265_interp_4tap_vert_pp_8x8_avx2;
+
+        // color space i422
+        p.chroma[X265_CSP_I422].filter_vpp[CHROMA422_4x4] = x265_interp_4tap_vert_pp_4x4_avx2;
+
+        p.luma_vps[LUMA_4x4] = x265_interp_8tap_vert_ps_4x4_avx2;
+
+#if X86_64
+        p.chroma[X265_CSP_I420].filter_vpp[CHROMA_16x16] = x265_interp_4tap_vert_pp_16x16_avx2;
+        p.chroma[X265_CSP_I420].filter_vpp[CHROMA_32x32] = x265_interp_4tap_vert_pp_32x32_avx2;
+#endif
     }
 #endif // if HIGH_BIT_DEPTH
 }