From 4d705375dff3734dd5fb74de73b2b56b9fae84ac Mon Sep 17 00:00:00 2001
From: Jakhes <dean.schmitz@schmitzbauer.de>
Date: Mon, 12 Sep 2022 18:17:47 +0200
Subject: [PATCH] Adding Kmeans to the Project

---
 src/helper_files/helper.cpp       |   6 +
 src/helper_files/helper.hpp       |   2 +
 src/methods/kmeans/Makefile       |   8 ++
 src/methods/kmeans/kmeans.cpp     | 232 ++++++++++++++++++++++++++++++
 src/methods/kmeans/kmeans.pl      |  79 ++++++++++
 src/methods/kmeans/kmeans_test.pl |  60 ++++++++
 6 files changed, 387 insertions(+)
 create mode 100644 src/methods/kmeans/Makefile
 create mode 100644 src/methods/kmeans/kmeans.cpp
 create mode 100644 src/methods/kmeans/kmeans.pl
 create mode 100644 src/methods/kmeans/kmeans_test.pl

diff --git a/src/helper_files/helper.cpp b/src/helper_files/helper.cpp
index ec3dd05..44bc9c2 100644
--- a/src/helper_files/helper.cpp
+++ b/src/helper_files/helper.cpp
@@ -44,6 +44,12 @@ float *convertToArray(vector<double> vec)
 	return convertToArray(newVec);
 }
 
+float *convertToArray(Row<size_t> vec)
+{
+	colvec newVec = conv_to<colvec>::from(vec);
+	return convertToArray(newVec);
+}
+
 float *convertToArray(vector<vec> matrix)
 {
 	vec newVec = matrix[0];
diff --git a/src/helper_files/helper.hpp b/src/helper_files/helper.hpp
index 5a21234..80e45ac 100644
--- a/src/helper_files/helper.hpp
+++ b/src/helper_files/helper.hpp
@@ -17,6 +17,8 @@ float *convertToArray(vector<size_t> vec);
 
 float *convertToArray(vector<double> vec);
 
+float *convertToArray(Row<size_t> vec);
+
 float *convertToArray(vector<vec> vec);
 
 
diff --git a/src/methods/kmeans/Makefile b/src/methods/kmeans/Makefile
new file mode 100644
index 0000000..ddaebe1
--- /dev/null
+++ b/src/methods/kmeans/Makefile
@@ -0,0 +1,8 @@
+splfr=/usr/local/sicstus4.7.1/bin/splfr
+
+METHOD_NAME=kmeans
+
+$(METHOD_NAME).so: $(METHOD_NAME).pl $(METHOD_NAME).cpp
+	$(splfr) -larmadillo -fopenmp -lmlpack -lstdc++ -cxx --struct $(METHOD_NAME).pl $(METHOD_NAME).cpp ../../helper_files/helper.cpp
+clean:
+	rm $(METHOD_NAME).so
diff --git a/src/methods/kmeans/kmeans.cpp b/src/methods/kmeans/kmeans.cpp
new file mode 100644
index 0000000..41dda47
--- /dev/null
+++ b/src/methods/kmeans/kmeans.cpp
@@ -0,0 +1,232 @@
+#include <sicstus/sicstus.h>
+/* ex_glue.h is generated by splfr from the foreign/[2,3] facts.
+   Always include the glue header in your foreign resource code.
+*/
+#include "kmeans_glue.h"
+#include <mlpack/methods/kmeans/kmeans.hpp>
+
+// Include initialization strategies.
+#include <mlpack/methods/kmeans/sample_initialization.hpp>
+#include <mlpack/methods/kmeans/random_partition.hpp>
+
+// Include empty cluster policies.
+#include <mlpack/methods/kmeans/max_variance_new_cluster.hpp>
+#include <mlpack/methods/kmeans/kill_empty_clusters.hpp>
+#include <mlpack/methods/kmeans/allow_empty_clusters.hpp>
+ 
+// Include Lloyd step types.
+#include <mlpack/methods/kmeans/dual_tree_kmeans.hpp>
+#include <mlpack/methods/kmeans/elkan_kmeans.hpp>
+#include <mlpack/methods/kmeans/hamerly_kmeans.hpp>
+#include <mlpack/methods/kmeans/pelleg_moore_kmeans.hpp>
+#include <mlpack/core.hpp>
+
+// including helper functions for converting between arma structures and arrays
+#include "../../helper_files/helper.hpp"
+
+// some of the most used namespaces
+using namespace arma;
+using namespace mlpack;
+using namespace std;
+using namespace mlpack::kmeans;
+using namespace mlpack::metric;
+
+// internal Funktion 
+// initiats KMeans with the given Class Template and inputs the params and returns the results
+template< template<class,class> class LloydStepType=NaiveKMeans>
+void InitAndClusterKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters,
+					float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum)
+{
+	// convert the Prolog arrays to arma::mat
+	mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum);
+
+	// get the ReturnVector
+	arma::Row<size_t> toReturnVector;
+
+	// get the ReturnMat
+	mat toReturnMat;
+
+
+	switch (initialPartition)
+	{
+	case 0:
+		// SampleInitialization
+		cout << "0" << endl;
+		switch (emptyCluster)
+		{
+		case 0:
+			// MaxVarianceNewCluster
+			cout << "0" << endl;
+			KMeans<EuclideanDistance, SampleInitialization, MaxVarianceNewCluster, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false);
+			break;
+
+		case 1:
+			// KillEmptyClusters
+			cout << "1" << endl;
+			KMeans<EuclideanDistance, SampleInitialization, KillEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false);
+			break;
+
+		case 2:
+			// AllowEmptyClusters
+			cout << "2" << endl;
+			KMeans<EuclideanDistance, SampleInitialization, AllowEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false);
+			break;
+
+		default:
+			// Wrong input
+			cout << "wrong input of emptyCluster, should be 0, 1 or 2" << endl;
+			break;
+		}
+		break;
+
+	case 1:
+		// RandomPartition
+		cout << "1" << endl;
+		switch (emptyCluster)
+		{
+		case 0:
+			// MaxVarianceNewCluster
+			cout << "0" << endl;
+			KMeans<EuclideanDistance, RandomPartition, MaxVarianceNewCluster, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false);
+			break;
+
+		case 1:
+			// KillEmptyClusters
+			cout << "1" << endl;
+			KMeans<EuclideanDistance, RandomPartition, KillEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false);
+			break;
+
+		case 2:
+			// AllowEmptyClusters
+			cout << "2" << endl;
+			KMeans<EuclideanDistance, RandomPartition, AllowEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false);
+			break;
+
+		default:
+			// Wrong input
+			cout << "wrong input of emptyCluster, should be 0, 1 or 2" << endl;
+			break;
+		}
+		break;
+
+	default:
+		// Wrong input
+		cout << "wrong input of initialPartition, should be 0 or 1" << endl;
+		break;
+	}
+
+	// return the Vector lenght
+	*assignmentsArrSize = toReturnVector.n_elem;
+
+	// return the Vector as Array
+	*assignmentsArr = convertToArray(toReturnVector);
+
+	// return the Matrix dimensions
+	*centroidsMatColNum = toReturnMat.n_cols;
+	*centroidsMatRowNum = toReturnMat.n_rows;
+
+	// return the Matrix as one long Array
+	*centroidsMatArr = convertToArray(toReturnMat);
+}
+
+// TODO: 
+// input: 	const size_t 					maxIterations = 1000,
+//			const InitialPartitionPolicy 	partitioner = InitialPartitionPolicy(),
+//			const EmptyClusterPolicy 		emptyClusterAction = EmptyClusterPolicy()
+//
+//			const MatType & 		data,
+//			const size_t 			clusters,
+//			arma::Row< size_t > & 	assignments,
+//			arma::mat & 			centroids,
+//			const bool 				initialAssignmentGuess = false,
+//			const bool 				initialCentroidGuess = false 
+// output: 
+// description: 
+void naiveKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters,
+					float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum)
+{
+	InitAndClusterKMeans<NaiveKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters,
+					 assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+}
+
+// TODO: 
+// input: 	const size_t 					maxIterations = 1000,
+//			const InitialPartitionPolicy 	partitioner = InitialPartitionPolicy(),
+//			const EmptyClusterPolicy 		emptyClusterAction = EmptyClusterPolicy()
+//
+//			const MatType & 		data,
+//			const size_t 			clusters,
+//			arma::Row< size_t > & 	assignments,
+//			arma::mat & 			centroids,
+//			const bool 				initialAssignmentGuess = false,
+//			const bool 				initialCentroidGuess = false 
+// output: 
+// description: 
+void dualTreeKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters,
+					float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum)
+{
+	InitAndClusterKMeans<DualTreeKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters,
+					 assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+}
+
+// TODO: 
+// input: 	const size_t 					maxIterations = 1000,
+//			const InitialPartitionPolicy 	partitioner = InitialPartitionPolicy(),
+//			const EmptyClusterPolicy 		emptyClusterAction = EmptyClusterPolicy()
+//
+//			const MatType & 		data,
+//			const size_t 			clusters,
+//			arma::Row< size_t > & 	assignments,
+//			arma::mat & 			centroids,
+//			const bool 				initialAssignmentGuess = false,
+//			const bool 				initialCentroidGuess = false 
+// output: 
+// description: 
+void elkanKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters,
+					float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum)
+{
+	InitAndClusterKMeans<ElkanKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters,
+					 assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+}
+
+// TODO: 
+// input: 	const size_t 					maxIterations = 1000,
+//			const InitialPartitionPolicy 	partitioner = InitialPartitionPolicy(),
+//			const EmptyClusterPolicy 		emptyClusterAction = EmptyClusterPolicy()
+//
+//			const MatType & 		data,
+//			const size_t 			clusters,
+//			arma::Row< size_t > & 	assignments,
+//			arma::mat & 			centroids,
+//			const bool 				initialAssignmentGuess = false,
+//			const bool 				initialCentroidGuess = false 
+// output: 
+// description: 
+void hamerlyKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters,
+					float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum)
+{
+	InitAndClusterKMeans<HamerlyKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters,
+					 assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+}
+
+
+
+// TODO: 
+// input: 	const size_t 					maxIterations = 1000,
+//			const InitialPartitionPolicy 	partitioner = InitialPartitionPolicy(),
+//			const EmptyClusterPolicy 		emptyClusterAction = EmptyClusterPolicy()
+//
+//			const MatType & 		data,
+//			const size_t 			clusters,
+//			arma::Row< size_t > & 	assignments,
+//			arma::mat & 			centroids,
+//			const bool 				initialAssignmentGuess = false,
+//			const bool 				initialCentroidGuess = false 
+// output: 
+// description: 
+void pellegMooreKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters,
+					float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum)
+{
+	InitAndClusterKMeans<PellegMooreKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters,
+					 assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+}
\ No newline at end of file
diff --git a/src/methods/kmeans/kmeans.pl b/src/methods/kmeans/kmeans.pl
new file mode 100644
index 0000000..01b0036
--- /dev/null
+++ b/src/methods/kmeans/kmeans.pl
@@ -0,0 +1,79 @@
+:- module(kmeans, [  naiveKMeans/12,
+                     dualTreeKMeans/12, 
+                     elkanKMeans/12, 
+                     hamerlyKMeans/12, 
+                     pellegMooreKMeans/12]).
+
+%% requirements of library(struct)
+:- load_files(library(str_decl),
+              [when(compile_time), if(changed)]).
+
+%% needed for using the array type
+:- use_module(library(structs)).
+:- use_module('../../helper_files/helper.pl').
+
+%% type definitions for the float array
+:- foreign_type
+       float32          = float_32,
+       float_array      = array(float32).
+
+%% definitions for the connected function
+
+%% TODO: 
+%% input: 
+%% output: 
+%% description: 
+foreign(naiveKMeans, c, naiveKMeans(+integer, +integer, +integer, +pointer(float_array), +integer, +integer, +integer,
+                                   -pointer(float_array), -integer, -pointer(float_array), -integer, -integer)).
+
+%% TODO: 
+%% input: 
+%% output: 
+%% description: 
+foreign(dualTreeKMeans, c, dualTreeKMeans(+integer, +integer, +integer, +pointer(float_array), +integer, +integer, +integer,
+                                   -pointer(float_array), -integer, -pointer(float_array), -integer, -integer)).
+
+%% TODO: 
+%% input: 
+%% output: 
+%% description: 
+foreign(elkanKMeans, c, elkanKMeans(+integer, +integer, +integer, +pointer(float_array), +integer, +integer, +integer,
+                                   -pointer(float_array), -integer, -pointer(float_array), -integer, -integer)).
+
+%% TODO: 
+%% input: 
+%% output: 
+%% description: 
+foreign(hamerlyKMeans, c, hamerlyKMeans(+integer, +integer, +integer, +pointer(float_array), +integer, +integer, +integer,
+                                   -pointer(float_array), -integer, -pointer(float_array), -integer, -integer)).
+
+%% TODO: 
+%% input: 
+%% output: 
+%% description: 
+foreign(pellegMooreKMeans, c, pellegMooreKMeans(+integer, +integer, +integer, +pointer(float_array), +integer, +integer, +integer,
+                                   -pointer(float_array), -integer, -pointer(float_array), -integer, -integer)).
+
+%% +integer , +float32
+%% [-integer] , [-float32]
+
+%% matrix input
+%% +pointer(float_array), +integer, +integer
+
+%% array input
+%% +pointer(float_array), +integer
+
+%% matrix return
+%% -pointer(float_array), -integer, -integer
+
+%% array return
+%% -pointer(float_array), -integer
+
+%% Defines the functions that get connected from main.cpp
+foreign_resource(kmeans, [  naiveKMeans,
+                            dualTreeKMeans, 
+                            elkanKMeans, 
+                            hamerlyKMeans, 
+                            pellegMooreKMeans]).
+
+:- load_foreign_resource(kmeans).
\ No newline at end of file
diff --git a/src/methods/kmeans/kmeans_test.pl b/src/methods/kmeans/kmeans_test.pl
new file mode 100644
index 0000000..dc45c46
--- /dev/null
+++ b/src/methods/kmeans/kmeans_test.pl
@@ -0,0 +1,60 @@
+:- use_module(library(plunit)).
+
+:- use_module(kmeans).
+:- use_module('../../helper_files/helper.pl').
+
+
+:- begin_tests(lists).
+
+%% alpha tests
+test(naiveKMeans) :-
+        convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)),
+        naiveKMeans(20, 0, 1, X, Xsize, Xrownum, 3, Y, Ysize, Centroids, Centroidscolnum, Centroidsrownum),
+        print(Ysize),
+        convert_float_array_to_list(Y, Ysize, ResultsY),
+        print(ResultsY),
+        print(Centroidscolnum),
+        convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results),
+        print(Results).
+
+test(dualTreeKMeans) :-
+        convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)),
+        dualTreeKMeans(20, 0, 1, X, Xsize, Xrownum, 3, Y, Ysize, Centroids, Centroidscolnum, Centroidsrownum),
+        print(Ysize),
+        convert_float_array_to_list(Y, Ysize, ResultsY),
+        print(ResultsY),
+        print(Centroidscolnum),
+        convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results),
+        print(Results).
+
+test(elkanKMeans) :-
+        convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)),
+        elkanKMeans(20, 0, 1, X, Xsize, Xrownum, 3, Y, Ysize, Centroids, Centroidscolnum, Centroidsrownum),
+        print(Ysize),
+        convert_float_array_to_list(Y, Ysize, ResultsY),
+        print(ResultsY),
+        print(Centroidscolnum),
+        convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results),
+        print(Results).
+
+test(hamerlyKMeans) :-
+        convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)),
+        hamerlyKMeans(20, 0, 1, X, Xsize, Xrownum, 3, Y, Ysize, Centroids, Centroidscolnum, Centroidsrownum),
+        print(Ysize),
+        convert_float_array_to_list(Y, Ysize, ResultsY),
+        print(ResultsY),
+        print(Centroidscolnum),
+        convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results),
+        print(Results).
+
+test(pellegMooreKMeans) :-
+        convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)),
+        pellegMooreKMeans(20, 0, 0, X, Xsize, Xrownum, 3, Y, Ysize, Centroids, Centroidscolnum, Centroidsrownum),
+        print(Ysize),
+        convert_float_array_to_list(Y, Ysize, ResultsY),
+        print(ResultsY),
+        print(Centroidscolnum),
+        convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results),
+        print(Results).
+
+:- end_tests(lists).
\ No newline at end of file
-- 
GitLab