From f65f7180c7101fc8b73b042699940c49ca503895 Mon Sep 17 00:00:00 2001
From: Jakhes <dean.schmitz@schmitzbauer.de>
Date: Tue, 13 Sep 2022 14:09:23 +0200
Subject: [PATCH] Adding DBScan to the project

---
 Makefile                          |   2 +
 src/methods/dbscan/Makefile       |   8 +
 src/methods/dbscan/dbscan.cpp     | 243 ++++++++++++++++++++++++++++++
 src/methods/dbscan/dbscan.pl      |  36 +++++
 src/methods/dbscan/dbscan_test.pl |  48 ++++++
 5 files changed, 337 insertions(+)
 create mode 100644 src/methods/dbscan/Makefile
 create mode 100644 src/methods/dbscan/dbscan.cpp
 create mode 100644 src/methods/dbscan/dbscan.pl
 create mode 100644 src/methods/dbscan/dbscan_test.pl

diff --git a/Makefile b/Makefile
index 3e8d933..8eaebab 100644
--- a/Makefile
+++ b/Makefile
@@ -7,6 +7,7 @@ all:
 	make -C src/methods/lars splfr=$(SPLFR_PATH)
 	make -C src/methods/ada_boost splfr=$(SPLFR_PATH)
 	make -C src/methods/kmeans splfr=$(SPLFR_PATH)
+	make -C src/methods/dbscan splfr=$(SPLFR_PATH)
 
 clean:
 	make -C src/methods/bayesian_linear_regression clean
@@ -14,3 +15,4 @@ clean:
 	make -C src/methods/lars clean
 	make -C src/methods/ada_boost clean
 	make -C src/methods/kmeans clean
+	make -C src/methods/dbscan clean
diff --git a/src/methods/dbscan/Makefile b/src/methods/dbscan/Makefile
new file mode 100644
index 0000000..f77df52
--- /dev/null
+++ b/src/methods/dbscan/Makefile
@@ -0,0 +1,8 @@
+splfr=/usr/local/sicstus4.7.1/bin/splfr
+
+METHOD_NAME=dbscan
+
+$(METHOD_NAME).so: $(METHOD_NAME).pl $(METHOD_NAME).cpp
+	$(splfr) -larmadillo -fopenmp -lmlpack -lstdc++ -cxx --struct $(METHOD_NAME).pl $(METHOD_NAME).cpp ../../helper_files/helper.cpp
+clean:
+	rm $(METHOD_NAME).so
diff --git a/src/methods/dbscan/dbscan.cpp b/src/methods/dbscan/dbscan.cpp
new file mode 100644
index 0000000..c0a1c2d
--- /dev/null
+++ b/src/methods/dbscan/dbscan.cpp
@@ -0,0 +1,243 @@
+#include <sicstus/sicstus.h>
+/* ex_glue.h is generated by splfr from the foreign/[2,3] facts.
+   Always include the glue header in your foreign resource code.
+*/
+#include "dbscan_glue.h"
+#include <mlpack/methods/dbscan/dbscan.hpp>
+#include <mlpack/methods/range_search/rs_model.hpp>
+#include <mlpack/core.hpp>
+
+// including helper functions for converting between arma structures and arrays
+#include "../../helper_files/helper.hpp"
+
+// some of the most used namespaces
+using namespace arma;
+using namespace mlpack;
+using namespace std;
+using namespace mlpack::dbscan;
+using namespace mlpack::range;
+using namespace mlpack::metric;
+
+
+template<typename RangeSearchType = range::RangeSearch<>, typename PointSelectionPolicy = OrderedPointSelection>
+void initAndCluster(double epsilon, SP_integer minPoints, SP_integer batchMode, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum,
+				float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum)
+{
+	// convert the Prolog arrays to arma::mat
+	mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum);
+
+	// get the ReturnVector
+	arma::Row<size_t> toReturnVector;
+
+	// get the ReturnMat
+	mat toReturnMat;
+
+	DBSCAN<RangeSearchType, PointSelectionPolicy>(epsilon, minPoints, (batchMode == 1)).Cluster(data, toReturnVector, toReturnMat);
+
+	// return the Vector lenght
+	*assignmentsArrSize = toReturnVector.n_elem;
+
+	// return the Vector as Array
+	*assignmentsArr = convertToArray(toReturnVector);
+
+	// return the Matrix dimensions
+	*centroidsMatColNum = toReturnMat.n_cols;
+	*centroidsMatRowNum = toReturnMat.n_rows;
+
+	// return the Matrix as one long Array
+	*centroidsMatArr = convertToArray(toReturnMat);
+}
+
+// TODO: 
+// input: 	const double 			epsilon,
+//			const size_t 			minPoints,
+//			const bool 				batchMode = true,
+//			RangeSearchType 		rangeSearch = RangeSearchType(),
+//			PointSelectionPolicy 	pointSelector = PointSelectionPolicy()
+//
+//			const MatType & 		data,
+//			arma::Row< size_t > & 	assignments,
+//			arma::mat & 			centroids
+// output: 
+// description: 
+void dbscan(double epsilon, SP_integer minPoints, SP_integer batchMode, char const *selectionType, char const *treeType, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum,
+				float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum)
+{
+	if(strcmp(selectionType, "ordered") == 0) 
+	{
+		cout << "ordered" << endl;
+		if (strcmp(treeType, "kd") == 0)
+		{
+			cout << "kd" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::KDTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "r") == 0)
+		{
+			cout << "r" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "r_star") == 0)
+		{
+			cout << "r_star" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RStarTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "x") == 0)
+		{
+			cout << "x" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::XTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "hilbert_r") == 0)
+		{
+			cout << "hilbert_r" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::HilbertRTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "r_plus") == 0)
+		{
+			cout << "r_plus" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RPlusTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "r_plus_plus") == 0)
+		{
+			cout << "r_plus_plus" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RPlusPlusTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "cover") == 0)
+		{
+			cout << "cover" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::CoverTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "ball") == 0)
+		{
+			cout << "ball" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::BallTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else
+		{
+			cout << "wrong treeType input" << endl;
+		}
+	}
+	else if (strcmp(selectionType, "random") == 0)
+	{
+		cout << "random" << endl;
+		if (strcmp(treeType, "kd") == 0)
+		{
+			cout << "kd" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::KDTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "r") == 0)
+		{
+			cout << "r" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "r_star") == 0)
+		{
+			cout << "r_star" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RStarTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "x") == 0)
+		{
+			cout << "x" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::XTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "hilbert_r") == 0)
+		{
+			cout << "hilbert_r" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::HilbertRTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "r_plus") == 0)
+		{
+			cout << "r_plus" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RPlusTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "r_plus_plus") == 0)
+		{
+			cout << "r_plus_plus" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RPlusPlusTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "cover") == 0)
+		{
+			cout << "cover" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::CoverTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else if (strcmp(treeType, "ball") == 0)
+		{
+			cout << "ball" << endl;
+			initAndCluster<RangeSearch<EuclideanDistance, mat, tree::BallTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, 
+																				centroidsMatArr, centroidsMatColNum, centroidsMatRowNum);
+		}
+		else
+		{
+			cout << "wrong treeType input" << endl;
+		}
+	}
+	else
+	{
+		cout << "wrong selectionType input" << endl;
+	}
+	
+}
+
+
+void takeNumberFunction(SP_integer integerNumber, double doubleNumber, char const *string)
+{
+
+}
+
+SP_integer returnNumberFunction()
+{
+	return 7;
+}
+
+void takeMatrixFunction(float *givenMatArr, SP_integer givenMatSize, SP_integer givenMatRowNum)
+{
+	// convert the Prolog arrays to arma::mat
+	mat data = convertArrayToMat(givenMatArr, givenMatSize, givenMatRowNum);
+}
+
+void takeArrayFunction(float *givenArr, SP_integer givenArrSize)
+{
+	// convert the Prolog arrays to arma::rowvec
+	rowvec givenVector = convertArrayToRowvec(givenArr, givenArrSize);
+}
+
+void returnMatrixFunction(float **returnMatArr, SP_integer *returnMatColNum, SP_integer *returnMatRowNum)
+{
+	// get the ReturnMat
+	mat toReturnMat = rowvec(3, 3, fill::ones);
+
+	// return the Matrix dimensions
+	*returnMatColNum = toReturnMat.n_cols;
+	*returnMatRowNum = toReturnMat.n_rows;
+
+	// return the Matrix as one long Array
+	*returnMatArr = convertToArray(toReturnMat);
+}
+
+void returnArrayFunction(float **returnArr, SP_integer *returnArrSize)
+{
+	// get the ReturnVector
+	rowvec toReturnVector = rowvec(3, fill::ones);
+
+	// return the Vector lenght
+	*returnArrSize = toReturnVector.n_elem;
+
+	// return the Vector as Array
+	*returnArr = convertToArray(toReturnVector);
+}
\ No newline at end of file
diff --git a/src/methods/dbscan/dbscan.pl b/src/methods/dbscan/dbscan.pl
new file mode 100644
index 0000000..62c68e8
--- /dev/null
+++ b/src/methods/dbscan/dbscan.pl
@@ -0,0 +1,36 @@
+:- module(dbscan, [dbscan/13]).
+
+%% requirements of library(struct)
+:- load_files(library(str_decl),
+              [when(compile_time), if(changed)]).
+
+%% needed for using the array type
+:- use_module(library(structs)).
+:- use_module('../../helper_files/helper.pl').
+
+%% type definitions for the float array
+:- foreign_type
+       float32          = float_32,
+       float_array      = array(float32).
+
+%% definitions for the connected function
+
+%% input:       double  epsilon
+%%              int     minPoints
+%%              bool    batchMode       (1)true / (0)false
+%%              str     selectionType   "orderd", "random"
+%%              str     treeType        "kd", "r", "r_star", "x", "hilbert_r", "r_plus", "r_plus_plus", "cover", "ball"
+%%              mat     data            data(float_array), dataSize, dataRowNum
+%%
+%% output:      vec     assignments     assign(float_array), assignSize
+%%              mat     centroids       cent(float_array), centColNum, centRowNum
+%%
+%% description: 
+foreign(dbscan, c, dbscan(+float32, +integer, +integer, +string, +string, +pointer(float_array), +integer, +integer,
+                                   -pointer(float_array), -integer, -pointer(float_array), -integer, -integer)).
+
+
+%% Defines the functions that get connected from main.cpp
+foreign_resource(dbscan, [dbscan]).
+
+:- load_foreign_resource(dbscan).
\ No newline at end of file
diff --git a/src/methods/dbscan/dbscan_test.pl b/src/methods/dbscan/dbscan_test.pl
new file mode 100644
index 0000000..965aaf9
--- /dev/null
+++ b/src/methods/dbscan/dbscan_test.pl
@@ -0,0 +1,48 @@
+:- use_module(library(plunit)).
+
+:- use_module(dbscan).
+:- use_module('../../helper_files/helper.pl').
+
+
+:- begin_tests(lists).
+
+test(dbscanKDTree) :-
+        convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)),
+        dbscan(0.7, 1, 1, ordered, kd, X, Xsize, Xrownum, Assign, Assignsize, Centroids, Centroidscolnum, Centroidsrownum),
+        print(Assignsize),
+        convert_float_array_to_list(Assign, Assignsize, ResultsAssign),
+        print(ResultsAssign),
+        print(Centroidscolnum),
+        convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results),
+        print(Results).
+test(dbscanRTree) :-
+        convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)),
+        dbscan(0.7, 1, 1, ordered, r, X, Xsize, Xrownum, Assign, Assignsize, Centroids, Centroidscolnum, Centroidsrownum),
+        print(Assignsize),
+        convert_float_array_to_list(Assign, Assignsize, ResultsAssign),
+        print(ResultsAssign),
+        print(Centroidscolnum),
+        convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results),
+        print(Results).
+test(dbscanRStarTree) :-
+        convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)),
+        dbscan(0.7, 1, 1, ordered, r_star, X, Xsize, Xrownum, Assign, Assignsize, Centroids, Centroidscolnum, Centroidsrownum),
+        print(Assignsize),
+        convert_float_array_to_list(Assign, Assignsize, ResultsAssign),
+        print(ResultsAssign),
+        print(Centroidscolnum),
+        convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results),
+        print(Results).
+
+test(dbscanrandomKDTree) :-
+        convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)),
+        dbscan(0.7, 1, 1, random, kd, X, Xsize, Xrownum, Assign, Assignsize, Centroids, Centroidscolnum, Centroidsrownum),
+        print(Assignsize),
+        convert_float_array_to_list(Assign, Assignsize, ResultsAssign),
+        print(ResultsAssign),
+        print(Centroidscolnum),
+        convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results),
+        print(Results).
+
+
+:- end_tests(lists).
\ No newline at end of file
-- 
GitLab