+#if HAVE_BIGENDIAN
+// The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
+
+// The neat trick: We only care for half the elements,
+// high or low depending on (i<<3)%16 (it's 0 or 8 here),
+// and we're going to use vec_mule, so we choose
+// carefully how to "unpack" the elements into the even slots.
+#define GET_VF4(a, vf, f) {\
+ vf = vec_ld(a<< 3, f);\
+ if ((a << 3) % 16)\
+ vf = vec_mergel(vf, (vector signed short)vzero);\
+ else\
+ vf = vec_mergeh(vf, (vector signed short)vzero);\
+}
+#define FIRST_LOAD(sv, pos, s, per) {\
+ sv = vec_ld(pos, s);\
+ per = vec_lvsl(pos, s);\
+}
+#define UPDATE_PTR(s0, d0, s1, d1) {\
+ d0 = s0;\
+ d1 = s1;\
+}
+#define LOAD_SRCV(pos, a, s, per, v0, v1, vf) {\
+ v1 = vec_ld(pos + a + 16, s);\
+ vf = vec_perm(v0, v1, per);\
+}
+#define LOAD_SRCV8(pos, a, s, per, v0, v1, vf) {\
+ if ((((uintptr_t)s + pos) % 16) > 8) {\
+ v1 = vec_ld(pos + a + 16, s);\
+ }\
+ vf = vec_perm(v0, src_v1, per);\
+}
+#define GET_VFD(a, b, f, vf0, vf1, per, vf, off) {\
+ vf1 = vec_ld((a * 2 * filterSize) + (b * 2) + 16 + off, f);\
+ vf = vec_perm(vf0, vf1, per);\
+}
+#else /* else of #if HAVE_BIGENDIAN */
+#define GET_VF4(a, vf, f) {\
+ vf = (vector signed short)vec_vsx_ld(a << 3, f);\
+ vf = vec_mergeh(vf, (vector signed short)vzero);\
+}
+#define FIRST_LOAD(sv, pos, s, per) {}
+#define UPDATE_PTR(s0, d0, s1, d1) {}
+#define LOAD_SRCV(pos, a, s, per, v0, v1, vf) {\
+ vf = vec_vsx_ld(pos + a, s);\
+}
+#define LOAD_SRCV8(pos, a, s, per, v0, v1, vf) LOAD_SRCV(pos, a, s, per, v0, v1, vf)
+#define GET_VFD(a, b, f, vf0, vf1, per, vf, off) {\
+ vf = vec_vsx_ld((a * 2 * filterSize) + (b * 2) + off, f);\
+}
+#endif /* end of #if HAVE_BIGENDIAN */
+