7 #ifndef MACHINE_LEARNING_KMEANS_HPP 8 #define MACHINE_LEARNING_KMEANS_HPP 10 #include "../include/matrix/Matrix.hpp" 12 #include "../include/mersenne_twister/MersenneTwister.hpp" 43 throw invalid_argument(
"Data elements and cluster centroids don't have the same number of dimensions.");
46 MatrixD results = MatrixD::zeros(data.nRows(), 1);
48 for (
size_t i = 0; i < distances.nRows(); i++) {
49 for (
size_t j = 1; j < distances.nCols(); j++) {
50 double current_c = results(i, 0);
51 double current_d = distances(i, current_c);
53 double new_d = distances(i, new_c);
54 if (new_d < current_d)
55 results(i, 0) = new_c;
72 void fit(MatrixD data,
74 unsigned int iters = 100,
75 unsigned int inits = 100,
78 this->X = data.standardize();
82 this->totalIterations = 0;
83 MersenneTwister twister;
86 for (
int currentInit = 0; currentInit < inits; currentInit++) {
90 for (
size_t i = 0; i <
centroids.nRows(); i++)
91 for (
size_t j = 0; j <
centroids.nCols(); j++)
92 centroids(i, j) = twister.d_random(
X.getColumn(j).min(),
93 X.getColumn(j).max());
95 vector<int> sample = twister.randomValues(
X.nRows(),
k,
false);
97 for (
int i = 0; i < sample.size(); i++)
103 for (
int currentIteration = 0; currentIteration < iters; currentIteration++) {
105 cout << currentInit <<
'/' << inits + 1 <<
'\t' 106 << currentIteration <<
'/' << iters + 1 <<
'\t' 117 if (currentInit == 0 or
SSE() < minSSE) {
151 #endif //MACHINE_LEARNING_KMEANS_HPP void fit(MatrixD data, unsigned int k, unsigned int iters=100, unsigned int inits=100, double distance=2, InitializationMethod initMethod=SAMPLE, bool verbose=false)
Find the k centroids that best fit the data.
static MatrixD minkowski(MatrixD m, double p, bool root=true)
Calculates the Minkowski distances between elements in a matrix.
MatrixD predict(MatrixD data)
Assigns elements of a data set to clusters.
static MatrixD euclidean(MatrixD m, bool root=true)
Calculates the Euclidean distances between elements in a matrix.
Implementation of the k-means algorithm.
unsigned int getTotalIterations() const
double getDistance() const
unsigned int totalIterations
const MatrixD & getY() const
unsigned int getK() const
const MatrixD & getCentroids() const
InitializationMethod initMethod