From 4d705375dff3734dd5fb74de73b2b56b9fae84ac Mon Sep 17 00:00:00 2001 From: Jakhes <dean.schmitz@schmitzbauer.de> Date: Mon, 12 Sep 2022 18:17:47 +0200 Subject: [PATCH] Adding Kmeans to the Project --- src/helper_files/helper.cpp | 6 + src/helper_files/helper.hpp | 2 + src/methods/kmeans/Makefile | 8 ++ src/methods/kmeans/kmeans.cpp | 232 ++++++++++++++++++++++++++++++ src/methods/kmeans/kmeans.pl | 79 ++++++++++ src/methods/kmeans/kmeans_test.pl | 60 ++++++++ 6 files changed, 387 insertions(+) create mode 100644 src/methods/kmeans/Makefile create mode 100644 src/methods/kmeans/kmeans.cpp create mode 100644 src/methods/kmeans/kmeans.pl create mode 100644 src/methods/kmeans/kmeans_test.pl diff --git a/src/helper_files/helper.cpp b/src/helper_files/helper.cpp index ec3dd05..44bc9c2 100644 --- a/src/helper_files/helper.cpp +++ b/src/helper_files/helper.cpp @@ -44,6 +44,12 @@ float *convertToArray(vector<double> vec) return convertToArray(newVec); } +float *convertToArray(Row<size_t> vec) +{ + colvec newVec = conv_to<colvec>::from(vec); + return convertToArray(newVec); +} + float *convertToArray(vector<vec> matrix) { vec newVec = matrix[0]; diff --git a/src/helper_files/helper.hpp b/src/helper_files/helper.hpp index 5a21234..80e45ac 100644 --- a/src/helper_files/helper.hpp +++ b/src/helper_files/helper.hpp @@ -17,6 +17,8 @@ float *convertToArray(vector<size_t> vec); float *convertToArray(vector<double> vec); +float *convertToArray(Row<size_t> vec); + float *convertToArray(vector<vec> vec); diff --git a/src/methods/kmeans/Makefile b/src/methods/kmeans/Makefile new file mode 100644 index 0000000..ddaebe1 --- /dev/null +++ b/src/methods/kmeans/Makefile @@ -0,0 +1,8 @@ +splfr=/usr/local/sicstus4.7.1/bin/splfr + +METHOD_NAME=kmeans + +$(METHOD_NAME).so: $(METHOD_NAME).pl $(METHOD_NAME).cpp + $(splfr) -larmadillo -fopenmp -lmlpack -lstdc++ -cxx --struct $(METHOD_NAME).pl $(METHOD_NAME).cpp ../../helper_files/helper.cpp +clean: + rm $(METHOD_NAME).so diff --git a/src/methods/kmeans/kmeans.cpp b/src/methods/kmeans/kmeans.cpp new file mode 100644 index 0000000..41dda47 --- /dev/null +++ b/src/methods/kmeans/kmeans.cpp @@ -0,0 +1,232 @@ +#include <sicstus/sicstus.h> +/* ex_glue.h is generated by splfr from the foreign/[2,3] facts. + Always include the glue header in your foreign resource code. +*/ +#include "kmeans_glue.h" +#include <mlpack/methods/kmeans/kmeans.hpp> + +// Include initialization strategies. +#include <mlpack/methods/kmeans/sample_initialization.hpp> +#include <mlpack/methods/kmeans/random_partition.hpp> + +// Include empty cluster policies. +#include <mlpack/methods/kmeans/max_variance_new_cluster.hpp> +#include <mlpack/methods/kmeans/kill_empty_clusters.hpp> +#include <mlpack/methods/kmeans/allow_empty_clusters.hpp> + +// Include Lloyd step types. +#include <mlpack/methods/kmeans/dual_tree_kmeans.hpp> +#include <mlpack/methods/kmeans/elkan_kmeans.hpp> +#include <mlpack/methods/kmeans/hamerly_kmeans.hpp> +#include <mlpack/methods/kmeans/pelleg_moore_kmeans.hpp> +#include <mlpack/core.hpp> + +// including helper functions for converting between arma structures and arrays +#include "../../helper_files/helper.hpp" + +// some of the most used namespaces +using namespace arma; +using namespace mlpack; +using namespace std; +using namespace mlpack::kmeans; +using namespace mlpack::metric; + +// internal Funktion +// initiats KMeans with the given Class Template and inputs the params and returns the results +template< template<class,class> class LloydStepType=NaiveKMeans> +void InitAndClusterKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters, + float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +{ + // convert the Prolog arrays to arma::mat + mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum); + + // get the ReturnVector + arma::Row<size_t> toReturnVector; + + // get the ReturnMat + mat toReturnMat; + + + switch (initialPartition) + { + case 0: + // SampleInitialization + cout << "0" << endl; + switch (emptyCluster) + { + case 0: + // MaxVarianceNewCluster + cout << "0" << endl; + KMeans<EuclideanDistance, SampleInitialization, MaxVarianceNewCluster, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false); + break; + + case 1: + // KillEmptyClusters + cout << "1" << endl; + KMeans<EuclideanDistance, SampleInitialization, KillEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false); + break; + + case 2: + // AllowEmptyClusters + cout << "2" << endl; + KMeans<EuclideanDistance, SampleInitialization, AllowEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false); + break; + + default: + // Wrong input + cout << "wrong input of emptyCluster, should be 0, 1 or 2" << endl; + break; + } + break; + + case 1: + // RandomPartition + cout << "1" << endl; + switch (emptyCluster) + { + case 0: + // MaxVarianceNewCluster + cout << "0" << endl; + KMeans<EuclideanDistance, RandomPartition, MaxVarianceNewCluster, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false); + break; + + case 1: + // KillEmptyClusters + cout << "1" << endl; + KMeans<EuclideanDistance, RandomPartition, KillEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false); + break; + + case 2: + // AllowEmptyClusters + cout << "2" << endl; + KMeans<EuclideanDistance, RandomPartition, AllowEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false); + break; + + default: + // Wrong input + cout << "wrong input of emptyCluster, should be 0, 1 or 2" << endl; + break; + } + break; + + default: + // Wrong input + cout << "wrong input of initialPartition, should be 0 or 1" << endl; + break; + } + + // return the Vector lenght + *assignmentsArrSize = toReturnVector.n_elem; + + // return the Vector as Array + *assignmentsArr = convertToArray(toReturnVector); + + // return the Matrix dimensions + *centroidsMatColNum = toReturnMat.n_cols; + *centroidsMatRowNum = toReturnMat.n_rows; + + // return the Matrix as one long Array + *centroidsMatArr = convertToArray(toReturnMat); +} + +// TODO: +// input: const size_t maxIterations = 1000, +// const InitialPartitionPolicy partitioner = InitialPartitionPolicy(), +// const EmptyClusterPolicy emptyClusterAction = EmptyClusterPolicy() +// +// const MatType & data, +// const size_t clusters, +// arma::Row< size_t > & assignments, +// arma::mat & centroids, +// const bool initialAssignmentGuess = false, +// const bool initialCentroidGuess = false +// output: +// description: +void naiveKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters, + float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +{ + InitAndClusterKMeans<NaiveKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters, + assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); +} + +// TODO: +// input: const size_t maxIterations = 1000, +// const InitialPartitionPolicy partitioner = InitialPartitionPolicy(), +// const EmptyClusterPolicy emptyClusterAction = EmptyClusterPolicy() +// +// const MatType & data, +// const size_t clusters, +// arma::Row< size_t > & assignments, +// arma::mat & centroids, +// const bool initialAssignmentGuess = false, +// const bool initialCentroidGuess = false +// output: +// description: +void dualTreeKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters, + float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +{ + InitAndClusterKMeans<DualTreeKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters, + assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); +} + +// TODO: +// input: const size_t maxIterations = 1000, +// const InitialPartitionPolicy partitioner = InitialPartitionPolicy(), +// const EmptyClusterPolicy emptyClusterAction = EmptyClusterPolicy() +// +// const MatType & data, +// const size_t clusters, +// arma::Row< size_t > & assignments, +// arma::mat & centroids, +// const bool initialAssignmentGuess = false, +// const bool initialCentroidGuess = false +// output: +// description: +void elkanKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters, + float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +{ + InitAndClusterKMeans<ElkanKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters, + assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); +} + +// TODO: +// input: const size_t maxIterations = 1000, +// const InitialPartitionPolicy partitioner = InitialPartitionPolicy(), +// const EmptyClusterPolicy emptyClusterAction = EmptyClusterPolicy() +// +// const MatType & data, +// const size_t clusters, +// arma::Row< size_t > & assignments, +// arma::mat & centroids, +// const bool initialAssignmentGuess = false, +// const bool initialCentroidGuess = false +// output: +// description: +void hamerlyKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters, + float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +{ + InitAndClusterKMeans<HamerlyKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters, + assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); +} + + + +// TODO: +// input: const size_t maxIterations = 1000, +// const InitialPartitionPolicy partitioner = InitialPartitionPolicy(), +// const EmptyClusterPolicy emptyClusterAction = EmptyClusterPolicy() +// +// const MatType & data, +// const size_t clusters, +// arma::Row< size_t > & assignments, +// arma::mat & centroids, +// const bool initialAssignmentGuess = false, +// const bool initialCentroidGuess = false +// output: +// description: +void pellegMooreKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters, + float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +{ + InitAndClusterKMeans<PellegMooreKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters, + assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); +} \ No newline at end of file diff --git a/src/methods/kmeans/kmeans.pl b/src/methods/kmeans/kmeans.pl new file mode 100644 index 0000000..01b0036 --- /dev/null +++ b/src/methods/kmeans/kmeans.pl @@ -0,0 +1,79 @@ +:- module(kmeans, [ naiveKMeans/12, + dualTreeKMeans/12, + elkanKMeans/12, + hamerlyKMeans/12, + pellegMooreKMeans/12]). + +%% requirements of library(struct) +:- load_files(library(str_decl), + [when(compile_time), if(changed)]). + +%% needed for using the array type +:- use_module(library(structs)). +:- use_module('../../helper_files/helper.pl'). + +%% type definitions for the float array +:- foreign_type + float32 = float_32, + float_array = array(float32). + +%% definitions for the connected function + +%% TODO: +%% input: +%% output: +%% description: +foreign(naiveKMeans, c, naiveKMeans(+integer, +integer, +integer, +pointer(float_array), +integer, +integer, +integer, + -pointer(float_array), -integer, -pointer(float_array), -integer, -integer)). + +%% TODO: +%% input: +%% output: +%% description: +foreign(dualTreeKMeans, c, dualTreeKMeans(+integer, +integer, +integer, +pointer(float_array), +integer, +integer, +integer, + -pointer(float_array), -integer, -pointer(float_array), -integer, -integer)). + +%% TODO: +%% input: +%% output: +%% description: +foreign(elkanKMeans, c, elkanKMeans(+integer, +integer, +integer, +pointer(float_array), +integer, +integer, +integer, + -pointer(float_array), -integer, -pointer(float_array), -integer, -integer)). + +%% TODO: +%% input: +%% output: +%% description: +foreign(hamerlyKMeans, c, hamerlyKMeans(+integer, +integer, +integer, +pointer(float_array), +integer, +integer, +integer, + -pointer(float_array), -integer, -pointer(float_array), -integer, -integer)). + +%% TODO: +%% input: +%% output: +%% description: +foreign(pellegMooreKMeans, c, pellegMooreKMeans(+integer, +integer, +integer, +pointer(float_array), +integer, +integer, +integer, + -pointer(float_array), -integer, -pointer(float_array), -integer, -integer)). + +%% +integer , +float32 +%% [-integer] , [-float32] + +%% matrix input +%% +pointer(float_array), +integer, +integer + +%% array input +%% +pointer(float_array), +integer + +%% matrix return +%% -pointer(float_array), -integer, -integer + +%% array return +%% -pointer(float_array), -integer + +%% Defines the functions that get connected from main.cpp +foreign_resource(kmeans, [ naiveKMeans, + dualTreeKMeans, + elkanKMeans, + hamerlyKMeans, + pellegMooreKMeans]). + +:- load_foreign_resource(kmeans). \ No newline at end of file diff --git a/src/methods/kmeans/kmeans_test.pl b/src/methods/kmeans/kmeans_test.pl new file mode 100644 index 0000000..dc45c46 --- /dev/null +++ b/src/methods/kmeans/kmeans_test.pl @@ -0,0 +1,60 @@ +:- use_module(library(plunit)). + +:- use_module(kmeans). +:- use_module('../../helper_files/helper.pl'). + + +:- begin_tests(lists). + +%% alpha tests +test(naiveKMeans) :- + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)), + naiveKMeans(20, 0, 1, X, Xsize, Xrownum, 3, Y, Ysize, Centroids, Centroidscolnum, Centroidsrownum), + print(Ysize), + convert_float_array_to_list(Y, Ysize, ResultsY), + print(ResultsY), + print(Centroidscolnum), + convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results), + print(Results). + +test(dualTreeKMeans) :- + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)), + dualTreeKMeans(20, 0, 1, X, Xsize, Xrownum, 3, Y, Ysize, Centroids, Centroidscolnum, Centroidsrownum), + print(Ysize), + convert_float_array_to_list(Y, Ysize, ResultsY), + print(ResultsY), + print(Centroidscolnum), + convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results), + print(Results). + +test(elkanKMeans) :- + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)), + elkanKMeans(20, 0, 1, X, Xsize, Xrownum, 3, Y, Ysize, Centroids, Centroidscolnum, Centroidsrownum), + print(Ysize), + convert_float_array_to_list(Y, Ysize, ResultsY), + print(ResultsY), + print(Centroidscolnum), + convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results), + print(Results). + +test(hamerlyKMeans) :- + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)), + hamerlyKMeans(20, 0, 1, X, Xsize, Xrownum, 3, Y, Ysize, Centroids, Centroidscolnum, Centroidsrownum), + print(Ysize), + convert_float_array_to_list(Y, Ysize, ResultsY), + print(ResultsY), + print(Centroidscolnum), + convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results), + print(Results). + +test(pellegMooreKMeans) :- + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)), + pellegMooreKMeans(20, 0, 0, X, Xsize, Xrownum, 3, Y, Ysize, Centroids, Centroidscolnum, Centroidsrownum), + print(Ysize), + convert_float_array_to_list(Y, Ysize, ResultsY), + print(ResultsY), + print(Centroidscolnum), + convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results), + print(Results). + +:- end_tests(lists). \ No newline at end of file -- GitLab