Source code for usualsuspects.pca

# The PCA plot tool
# Author: Paul Scherer 2020

import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt


[docs]class Quick2DPCA(object): """A quick tool for crating 2D PCA plots with input X, and optional true classification y for colouring From SciKit Docs: Linear dimensionality reduction using SVD of the data to project it to a lower dimensional space. The input data is centered but not scaled for each feature before applying the SVD This tool is a shortcut to producing PCA plots """ def __init__(self, X, y): super(Quick2DPCA, self).__init__() self.X = X self.y = y self.num_components = 2 self.embeddings = PCA(n_components=2).fit_transform(X)
[docs] def plot_embedding(self, title=None, save_to="pca_plot.png"): """Creates and saves a matplotlib pyplot of the first 2 eigenvectors of the PCA operation on X Parameters ---------- title : str Optional title for the plot save_to : str path and file_name where the image will be saved. Extension is contextual but expected behaviour is to use .png Returns ------- None Saves a PNG image of the plot to the path in `save_to` """ x_min, x_max = np.min(self.embeddings, 0), np.max(self.embeddings, 0) embs_to_plot = (self.embeddings-x_min) / (x_max - x_min) # Check if class labels have been given for colouring if len(self.y)==self.X.shape[0] or len(self.y)==self.X.shape[1]: plt.scatter(embs_to_plot[:,0], embs_to_plot[:,1], c=self.y) plt.xlabel('First Eigenvector') plt.ylabel('Second Eigenvector') else: plt.scatter(embs_to_plot[:,0], embs_to_plot[:,1]) plt.xlabel('First Eigenvector') plt.ylabel('Second Eigenvector') plt.title(title) plt.savefig(save_to)
if __name__ == '__main__': from sklearn import datasets digits = datasets.load_digits(n_class=7) X = digits.data y = digits.target n_samples, n_features = X.shape pcaplot = Quick2DPCA(X,y) pcaplot.plot_embedding(title="Test plot")