From 00e75e1de6af736cb5b5a9e875269087d9180fc0 Mon Sep 17 00:00:00 2001 From: Moritz Schaefer Date: Fri, 21 Feb 2020 13:33:59 +0100 Subject: [PATCH 1/6] Preserve order of samples/classes/labels for PCA plot visualization (plot_pca_2d_projection) --- scikitplot/decomposition.py | 5 ++++- scikitplot/tests/test_decomposition.py | 27 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/scikitplot/decomposition.py b/scikitplot/decomposition.py index d3b28e3..f88204f 100644 --- a/scikitplot/decomposition.py +++ b/scikitplot/decomposition.py @@ -163,7 +163,10 @@ def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection', fig, ax = plt.subplots(1, 1, figsize=figsize) ax.set_title(title, fontsize=title_fontsize) - classes = np.unique(np.array(y)) + + # Get unique classes from y, preserving order of class occurence in y + _, class_indexes = np.unique(np.array(y), return_index=True) + classes = np.array(y)[np.sort(class_indexes)] colors = plt.cm.get_cmap(cmap)(np.linspace(0, 1, len(classes))) diff --git a/scikitplot/tests/test_decomposition.py b/scikitplot/tests/test_decomposition.py index f7e555b..3c3e7af 100644 --- a/scikitplot/tests/test_decomposition.py +++ b/scikitplot/tests/test_decomposition.py @@ -9,6 +9,7 @@ from scikitplot.decomposition import plot_pca_component_variance from scikitplot.decomposition import plot_pca_2d_projection +import scikitplot class TestPlotPCAComponentVariance(unittest.TestCase): @@ -81,3 +82,29 @@ def test_biplot(self): clf.fit(self.X) ax = plot_pca_2d_projection(clf, self.X, self.y, biplot=True, feature_labels=load_data().feature_names) + + def test_label_order(self): + ''' + Plot labels should be in the same order as the classes in the provided y-array + ''' + np.random.seed(0) + clf = PCA() + clf.fit(self.X) + + # define y such that the first entry is 1 + y = np.copy(self.y) + y[0] = 1 # load_iris is be default orderer (i.e.: 0 0 0 ... 1 1 1 ... 2 2 2) + + # test with len(y) == X.shape[0] with multiple rows belonging to the same class + ax = plot_pca_2d_projection(clf, self.X, y, cmap='Spectral') + legend_labels = ax.get_legend_handles_labels()[1] + self.assertListEqual(['1', '0', '2'], legend_labels) + + # test with len(y) == #classes with each row belonging to an individual class + y = list(range(len(y))) + np.random.shuffle(y) + ax = plot_pca_2d_projection(clf, self.X, y, cmap='Spectral') + legend_labels = ax.get_legend_handles_labels()[1] + self.assertListEqual([str(v) for v in y], legend_labels) + + From da4029703ab8bf45f9e417854d75727471ff8596 Mon Sep 17 00:00:00 2001 From: Moritz Schaefer Date: Fri, 21 Feb 2020 15:37:16 +0100 Subject: [PATCH 2/6] Allow optional plotting of labels onto the dots --- scikitplot/decomposition.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scikitplot/decomposition.py b/scikitplot/decomposition.py index f88204f..39be0a0 100644 --- a/scikitplot/decomposition.py +++ b/scikitplot/decomposition.py @@ -97,7 +97,8 @@ def plot_pca_component_variance(clf, title='PCA Component Explained Variances', def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection', biplot=False, feature_labels=None, ax=None, figsize=None, cmap='Spectral', - title_fontsize="large", text_fontsize="medium"): + title_fontsize="large", text_fontsize="medium", + label_dots=False): """Plots the 2-dimensional projection of PCA on a given dataset. Args: @@ -174,6 +175,10 @@ def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection', ax.scatter(transformed_X[y == label, 0], transformed_X[y == label, 1], alpha=0.8, lw=2, label=label, color=color) + if label_dots: + for dot in transformed_X[y == label, 0:2]: + ax.text(*dot, label) + if biplot: xs = transformed_X[:, 0] ys = transformed_X[:, 1] From 54494e4c8cf5e0573e5bdf13d0836975a28aec98 Mon Sep 17 00:00:00 2001 From: Moritz Schaefer Date: Wed, 11 Aug 2021 23:44:50 +0200 Subject: [PATCH 3/6] variably choose which dimensions to plot --- scikitplot/decomposition.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/scikitplot/decomposition.py b/scikitplot/decomposition.py index 39be0a0..cc90141 100644 --- a/scikitplot/decomposition.py +++ b/scikitplot/decomposition.py @@ -5,8 +5,8 @@ properties shared by scikit-learn estimators. The specific requirements are documented per function. """ -from __future__ import absolute_import, division, print_function, \ - unicode_literals +from __future__ import (absolute_import, division, print_function, + unicode_literals) import matplotlib.pyplot as plt import numpy as np @@ -95,6 +95,7 @@ def plot_pca_component_variance(clf, title='PCA Component Explained Variances', def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection', + dimensions=[0, 1], biplot=False, feature_labels=None, ax=None, figsize=None, cmap='Spectral', title_fontsize="large", text_fontsize="medium", @@ -172,31 +173,31 @@ def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection', colors = plt.cm.get_cmap(cmap)(np.linspace(0, 1, len(classes))) for label, color in zip(classes, colors): - ax.scatter(transformed_X[y == label, 0], transformed_X[y == label, 1], + ax.scatter(transformed_X[y == label, dimensions[0]], transformed_X[y == label, dimensions[1]], alpha=0.8, lw=2, label=label, color=color) if label_dots: - for dot in transformed_X[y == label, 0:2]: + for dot in transformed_X[y == label, dimensions]: ax.text(*dot, label) if biplot: - xs = transformed_X[:, 0] - ys = transformed_X[:, 1] - vectors = np.transpose(clf.components_[:2, :]) + xs = transformed_X[:, dimensions[0]] + ys = transformed_X[:, dimensions[1]] + vectors = np.transpose(clf.components_[dimensions, :]) vectors_scaled = vectors * [xs.max(), ys.max()] for i in range(vectors.shape[0]): - ax.annotate("", xy=(vectors_scaled[i, 0], vectors_scaled[i, 1]), + ax.annotate("", xy=(vectors_scaled[i, dimensions[0]], vectors_scaled[i, dimensions[1]]), xycoords='data', xytext=(0, 0), textcoords='data', arrowprops={'arrowstyle': '-|>', 'ec': 'r'}) - ax.text(vectors_scaled[i, 0] * 1.05, vectors_scaled[i, 1] * 1.05, + ax.text(vectors_scaled[i, dimensions[0]] * 1.05, vectors_scaled[i, dimensions[1]] * 1.05, feature_labels[i] if feature_labels else "Variable" + str(i), color='b', fontsize=text_fontsize) ax.legend(loc='best', shadow=False, scatterpoints=1, fontsize=text_fontsize) - ax.set_xlabel('First Principal Component', fontsize=text_fontsize) - ax.set_ylabel('Second Principal Component', fontsize=text_fontsize) + ax.set_xlabel(f'Principal Component {dimensions[0]+1}', fontsize=text_fontsize) + ax.set_ylabel(f'Principal Component {dimension[1]+1}', fontsize=text_fontsize) ax.tick_params(labelsize=text_fontsize) return ax From 0d0546d2d615cddd561617f3b76f1d967ba66b79 Mon Sep 17 00:00:00 2001 From: Moritz Schaefer Date: Thu, 12 Aug 2021 00:45:04 +0200 Subject: [PATCH 4/6] fix typo --- scikitplot/decomposition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scikitplot/decomposition.py b/scikitplot/decomposition.py index cc90141..b174af9 100644 --- a/scikitplot/decomposition.py +++ b/scikitplot/decomposition.py @@ -197,7 +197,7 @@ def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection', ax.legend(loc='best', shadow=False, scatterpoints=1, fontsize=text_fontsize) ax.set_xlabel(f'Principal Component {dimensions[0]+1}', fontsize=text_fontsize) - ax.set_ylabel(f'Principal Component {dimension[1]+1}', fontsize=text_fontsize) + ax.set_ylabel(f'Principal Component {dimensions[1]+1}', fontsize=text_fontsize) ax.tick_params(labelsize=text_fontsize) return ax From 2279aab64fd0d611c1a9ef1acd6d4d259f1065ff Mon Sep 17 00:00:00 2001 From: Moritz Schaefer Date: Thu, 12 Aug 2021 01:11:52 +0200 Subject: [PATCH 5/6] fix missing newaxis --- scikitplot/decomposition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scikitplot/decomposition.py b/scikitplot/decomposition.py index b174af9..96e7d5f 100644 --- a/scikitplot/decomposition.py +++ b/scikitplot/decomposition.py @@ -177,7 +177,7 @@ def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection', alpha=0.8, lw=2, label=label, color=color) if label_dots: - for dot in transformed_X[y == label, dimensions]: + for dot in transformed_X[y == label, np.array(dimensions)[:, np.newaxis]]: ax.text(*dot, label) if biplot: From 70ea50616366c87ef730f53efb192217b725a9f0 Mon Sep 17 00:00:00 2001 From: Moritz Schaefer Date: Thu, 12 Aug 2021 01:39:49 +0200 Subject: [PATCH 6/6] fix indexing --- scikitplot/decomposition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scikitplot/decomposition.py b/scikitplot/decomposition.py index 96e7d5f..d7892cb 100644 --- a/scikitplot/decomposition.py +++ b/scikitplot/decomposition.py @@ -177,7 +177,7 @@ def plot_pca_2d_projection(clf, X, y, title='PCA 2-D Projection', alpha=0.8, lw=2, label=label, color=color) if label_dots: - for dot in transformed_X[y == label, np.array(dimensions)[:, np.newaxis]]: + for dot in transformed_X[y == label][:, dimensions]: ax.text(*dot, label) if biplot: