Machine learning algorithms in C++
PCA.hpp
Go to the documentation of this file.
1 
8 #ifndef MACHINE_LEARNING_PCA_HPP
9 #define MACHINE_LEARNING_PCA_HPP
10 
11 #include "../include/matrix/Matrix.hpp"
12 
13 using namespace std;
14 
18 class PCA {
19 
20  private:
21  MatrixD X, eigenvalues, eigenvectors, percentages, cumPercentages;
22  public :
23 
28  explicit PCA(MatrixD data) {
29  X = std::move(data);
30  }
31 
35  void fit() {
36  MatrixD XMinusMean = X.minusMean(); // standardize columns to have 0 mean
37  MatrixD covariances = XMinusMean.cov(); // get covariance matrix of the data
38 
39  // get the sum of variances, this'll be useful later
40  double sumVar = 0;
41  for (size_t i = 0; i < covariances.nRows(); i++) {
42  sumVar += covariances(i, i);
43  }
44 
45  pair<MatrixD, MatrixD> eig = covariances.eigen(); // eigenvalues and eigenvectors of cov matrix
46  eigenvalues = eig.first;
47  eigenvectors = eig.second;
48 
49  // calculate the percentage of variance that each eigenvalue "explains"
50  percentages = MatrixD(eigenvalues.nRows(), eigenvalues.nCols());
51  cumPercentages = MatrixD(eigenvalues.nRows(), eigenvalues.nCols());
52  for (int i = 0; i < eigenvalues.nRows(); i++) {
53  percentages(i, 0) = eigenvalues(i, 0) / sumVar;
54  cumPercentages(i, 0) = i == 0 ? percentages(i, 0) : percentages(i, 0) + cumPercentages(i - 1, 0);
55  }
56  }
57 
60  MatrixD transform() {
61  MatrixD finalData = eigenvectors.transpose() * X.minusMean().transpose();
62  return finalData.transpose();
63  }
64 
65 
68  MatrixD transform(int numComponents) {
69  MatrixI filter = MatrixI::zeros(eigenvalues.nRows(), 1);
70 
71  for (int i = 0; i < numComponents; i++) {
72  filter(i, 0) = 1;
73  }
74 
75  MatrixD finalData = eigenvectors.getColumns(filter).transpose() * X.minusMean().transpose();
76  return finalData.transpose();
77  }
78 
79  const MatrixD &getEigenvalues() const {
80  return eigenvalues;
81  }
82 
83  const MatrixD &getEigenvectors() const {
84  return eigenvectors;
85  }
86 
87  const MatrixD &getPercentages() const {
88  return percentages;
89  }
90 
91  const MatrixD &getCumPercentages() const {
92  return cumPercentages;
93  }
94 };
95 
96 #endif //MACHINE_LEARNING_PCA_HPP
k-nearest neighbors algorithm, able to do regression and classification
MatrixD X
Definition: PCA.hpp:21
Principal component analysis.
Definition: PCA.hpp:18
PCA(MatrixD data)
Principal component analysis algorithm.
Definition: PCA.hpp:28
const MatrixD & getCumPercentages() const
Definition: PCA.hpp:91
const MatrixD & getEigenvectors() const
Definition: PCA.hpp:83
MatrixD transform(int numComponents)
Rotates the data set, using the eigenvectors of the covariance matrix with the largest eigenvalues as...
Definition: PCA.hpp:68
const MatrixD & getPercentages() const
Definition: PCA.hpp:87
MatrixD transform()
Rotates the data set, using the eigenvectors of the covariance matrix as the new base.
Definition: PCA.hpp:60
const MatrixD & getEigenvalues() const
Definition: PCA.hpp:79
void fit()
Finds the principal components of a Matrix.
Definition: PCA.hpp:35