"power_mean",
"weighted_sum",
"kmeans",
+ "kmeans2",
"knn_d1",
"knn_d2_mean",
"knn_d2_median",
) - sp.stats.pmean(normalized_matrix, p=p, weights=np_weights, axis=1)
elif metric == "weighted_sum":
return np.sum(np_weights * (ideal_point - normalized_matrix), axis=1)
- elif metric == "kmeans":
+ elif metric in {"kmeans", "kmeans2"}:
+ if n_samples < 2:
+ return np.ndarray([])
+ n_clusters = min(max(2, int(np.sqrt(n_samples / 2))), 10, n_samples)
+ if metric == "kmeans":
+ kmeans = sklearn.cluster.KMeans(
+ n_clusters=n_clusters, random_state=42, n_init=10
+ )
+ cluster_labels = kmeans.fit_predict(normalized_matrix)
+ cluster_centers = kmeans.cluster_centers_
+ elif metric == "kmeans2":
+ cluster_centers, cluster_labels = sp.cluster.vq.kmeans2(
+ normalized_matrix, n_clusters, rng=42, minit="++"
+ )
label_kmeans_metric = self.ft_params.get(
"label_kmeans_metric", "euclidean"
)
label_p_order, float
):
cdist_kwargs["p"] = label_p_order
- if n_samples == 0:
- return np.array([])
- if n_samples == 1:
- return sp.spatial.distance.cdist(
- normalized_matrix,
- ideal_point_2d,
- metric=label_kmeans_metric,
- **cdist_kwargs,
- ).flatten()
- n_clusters = min(max(2, int(np.sqrt(n_samples / 2))), 10, n_samples)
- kmeans = sklearn.cluster.KMeans(
- n_clusters=n_clusters, random_state=42, n_init=10
- )
- cluster_labels = kmeans.fit_predict(normalized_matrix)
- cluster_centers = kmeans.cluster_centers_
cluster_distances_to_ideal = sp.spatial.distance.cdist(
cluster_centers,
ideal_point_2d,