1from sklearn.preprocessing import StandardScaler
2from sklearn.decomposition import PCA
3from sklearn.cluster import KMeans
4
5# Step 1: Scale (mandatory before PCA and KMeans)
6scaler = StandardScaler()
7X_scaled = scaler.fit_transform(X)
8
9# Step 2: PCA to retain 95% variance
10pca = PCA(n_components=0.95)
11X_pca = pca.fit_transform(X_scaled)
12print(f"Features: {X.shape[1]} → {X_pca.shape[1]}")
13
14# Step 3: Elbow to find optimal K
15inertias = [KMeans(k).fit(X_pca).inertia_ for k in range(1,11)]
16# Inspect plot, pick K=4 at the elbow
17km = KMeans(n_clusters=4, random_state=42)
18labels = km.fit_predict(X_pca)
19print(f"Segments: {dict(zip(*np.unique(labels,return_counts=True)))}")