From: Jérôme Benoit Date: Wed, 18 Jun 2025 20:54:53 +0000 (+0200) Subject: feat(qav3): add scipy kmeans2 support to MO trial selection X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=6a4238d3c7474b27a58fdcf9162c93142b92a5d5;p=freqai-strategies.git feat(qav3): add scipy kmeans2 support to MO trial selection Signed-off-by: Jérôme Benoit --- diff --git a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py index 8d9f367..157d616 100644 --- a/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py +++ b/quickadapter/user_data/freqaimodels/QuickAdapterRegressorV3.py @@ -551,6 +551,7 @@ class QuickAdapterRegressorV3(BaseRegressionModel): "power_mean", "weighted_sum", "kmeans", + "kmeans2", "knn_d1", "knn_d2_mean", "knn_d2_median", @@ -663,7 +664,20 @@ class QuickAdapterRegressorV3(BaseRegressionModel): ) - sp.stats.pmean(normalized_matrix, p=p, weights=np_weights, axis=1) elif metric == "weighted_sum": return np.sum(np_weights * (ideal_point - normalized_matrix), axis=1) - elif metric == "kmeans": + elif metric in {"kmeans", "kmeans2"}: + if n_samples < 2: + return np.ndarray([]) + n_clusters = min(max(2, int(np.sqrt(n_samples / 2))), 10, n_samples) + if metric == "kmeans": + kmeans = sklearn.cluster.KMeans( + n_clusters=n_clusters, random_state=42, n_init=10 + ) + cluster_labels = kmeans.fit_predict(normalized_matrix) + cluster_centers = kmeans.cluster_centers_ + elif metric == "kmeans2": + cluster_centers, cluster_labels = sp.cluster.vq.kmeans2( + normalized_matrix, n_clusters, rng=42, minit="++" + ) label_kmeans_metric = self.ft_params.get( "label_kmeans_metric", "euclidean" ) @@ -672,21 +686,6 @@ class QuickAdapterRegressorV3(BaseRegressionModel): label_p_order, float ): cdist_kwargs["p"] = label_p_order - if n_samples == 0: - return np.array([]) - if n_samples == 1: - return sp.spatial.distance.cdist( - normalized_matrix, - ideal_point_2d, - metric=label_kmeans_metric, - **cdist_kwargs, - ).flatten() - n_clusters = min(max(2, int(np.sqrt(n_samples / 2))), 10, n_samples) - kmeans = sklearn.cluster.KMeans( - n_clusters=n_clusters, random_state=42, n_init=10 - ) - cluster_labels = kmeans.fit_predict(normalized_matrix) - cluster_centers = kmeans.cluster_centers_ cluster_distances_to_ideal = sp.spatial.distance.cdist( cluster_centers, ideal_point_2d,