7 #ifndef MACHINE_LEARNING_KNN_HPP 8 #define MACHINE_LEARNING_KNN_HPP 26 vector<vector<double>>
data;
34 void getKNN(
int *chosen_indices,
const vector<double> &testie) {
38 double chosen_distances[k];
40 int largestDistanceIndex = 0;
41 double largestCandidateDistance = distance == EUCLIDEAN ? euclidean(testie, data[0]) : hamming(testie, data[0]);;
42 for (
int i = 0; i < data.size(); i++) {
44 double current_distance = distance == EUCLIDEAN ? euclidean(testie, data[i]) : hamming(testie, data[i]);
48 chosen_indices[i] = i;
49 chosen_distances[i] = current_distance;
51 if (current_distance >= largestCandidateDistance) {
52 largestDistanceIndex = i;
53 largestCandidateDistance = current_distance;
57 else if (current_distance < largestCandidateDistance) {
60 chosen_indices[largestDistanceIndex] = i;
61 chosen_distances[largestDistanceIndex] = current_distance;
64 largestDistanceIndex = 0;
65 largestCandidateDistance = chosen_distances[0];
66 for (
int j = 1; j < k; j++) {
67 if (largestCandidateDistance < chosen_distances[j]) {
68 largestDistanceIndex = j;
69 largestCandidateDistance = chosen_distances[j];
86 const vector<vector<double>> &
getData()
const {
98 explicit KNN(vector<vector<double>> data,
int yColumn,
int k = 1,
Distance distance = EUCLIDEAN) {
99 this->data = std::move(data);
101 sort(this->data.begin(), this->data.end(), [](
const vector<double> &a,
const vector<double> &b) {
102 for (
int i = 0; i < a.size(); i++) {
111 this->yColumn = yColumn;
113 this->distance = distance;
122 for (
int i = 0; i < a.size(); i++) {
127 d += pow(a[i] - b[i], 2);
136 double hamming(vector<double> a, vector<double> b) {
138 for (
int i = 0; i < a.size(); i++) {
153 int chosen_indices[k];
154 getKNN(chosen_indices, testie);
159 #pragma omp parallel for reduction(+:ySum) 160 for (
int j = 0; j < k; j++) {
161 ySum += data[chosen_indices[j]][yColumn];
170 int chosen_indices[k];
171 getKNN(chosen_indices, testie);
174 int class_votes[k] = {0};
175 vector<double> classes;
179 classes.push_back(data[chosen_indices[0]][yColumn]);
180 for (
int j = 1; j < k; j++) {
181 double current_class = data[chosen_indices[j]][yColumn];
182 for (
int i = 0; i < classes.size(); i++) {
183 if (classes[i] == current_class) {
191 double winner = classes[0];
192 int winner_votes = class_votes[0];
193 for (
int j = 1; j < k; j++) {
194 if (class_votes[j] > winner_votes) {
196 winner_votes = class_votes[j];
203 vector<double>
classify(
const vector<vector<double>> &test,
bool verbose =
false) {
205 unsigned long totalSize = test.size();
207 using clock = chrono::high_resolution_clock;
208 chrono::time_point<chrono::system_clock> start = clock::now();
210 for (
int i = 0; i < test.size(); i++) {
211 if (verbose and i % 100 == 0) {
212 float tempo = ((chrono::duration<float>) (clock::now() - start)).count();
213 float media = tempo / (i + 1);
214 float total = media * totalSize;
215 cout << (total - tempo) / 60 << endl;
217 y.push_back(classify(test[i]));
223 vector<double>
regression(
const vector<vector<double>> &test,
bool verbose =
false) {
225 unsigned long totalSize = test.size();
227 for (
int i = 0; i < test.size(); i++) {
228 if (verbose and i % (totalSize / 100) == 0)
229 cout << totalSize / (i + 1);
230 y.push_back(regression(test[i]));
double euclidean(vector< double > a, vector< double > b)
Calculates the Euclidean distance between two vectors.
k-nearest neighbors algorithm
double hamming(vector< double > a, vector< double > b)
Calculates the Hamming distance between two vectors.
vector< double > regression(const vector< vector< double >> &test, bool verbose=false)
k-nearest neighbors algorithm, able to do regression and classification
void getKNN(int *chosen_indices, const vector< double > &testie)
Finds the k-nearest neighbors of a data element.
vector< double > classify(const vector< vector< double >> &test, bool verbose=false)
const vector< vector< double > > & getData() const
void setDistance(Distance distance)
double regression(const vector< double > &testie)
Perform regression using.
double classify(const vector< double > &testie)
KNN(vector< vector< double >> data, int yColumn, int k=1, Distance distance=EUCLIDEAN)
k-nearest neighbors algorithm, able to do regression and classification
vector< vector< double > > data
Distance getDistance() const