From f65f7180c7101fc8b73b042699940c49ca503895 Mon Sep 17 00:00:00 2001 From: Jakhes <dean.schmitz@schmitzbauer.de> Date: Tue, 13 Sep 2022 14:09:23 +0200 Subject: [PATCH] Adding DBScan to the project --- Makefile | 2 + src/methods/dbscan/Makefile | 8 + src/methods/dbscan/dbscan.cpp | 243 ++++++++++++++++++++++++++++++ src/methods/dbscan/dbscan.pl | 36 +++++ src/methods/dbscan/dbscan_test.pl | 48 ++++++ 5 files changed, 337 insertions(+) create mode 100644 src/methods/dbscan/Makefile create mode 100644 src/methods/dbscan/dbscan.cpp create mode 100644 src/methods/dbscan/dbscan.pl create mode 100644 src/methods/dbscan/dbscan_test.pl diff --git a/Makefile b/Makefile index 3e8d933..8eaebab 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,7 @@ all: make -C src/methods/lars splfr=$(SPLFR_PATH) make -C src/methods/ada_boost splfr=$(SPLFR_PATH) make -C src/methods/kmeans splfr=$(SPLFR_PATH) + make -C src/methods/dbscan splfr=$(SPLFR_PATH) clean: make -C src/methods/bayesian_linear_regression clean @@ -14,3 +15,4 @@ clean: make -C src/methods/lars clean make -C src/methods/ada_boost clean make -C src/methods/kmeans clean + make -C src/methods/dbscan clean diff --git a/src/methods/dbscan/Makefile b/src/methods/dbscan/Makefile new file mode 100644 index 0000000..f77df52 --- /dev/null +++ b/src/methods/dbscan/Makefile @@ -0,0 +1,8 @@ +splfr=/usr/local/sicstus4.7.1/bin/splfr + +METHOD_NAME=dbscan + +$(METHOD_NAME).so: $(METHOD_NAME).pl $(METHOD_NAME).cpp + $(splfr) -larmadillo -fopenmp -lmlpack -lstdc++ -cxx --struct $(METHOD_NAME).pl $(METHOD_NAME).cpp ../../helper_files/helper.cpp +clean: + rm $(METHOD_NAME).so diff --git a/src/methods/dbscan/dbscan.cpp b/src/methods/dbscan/dbscan.cpp new file mode 100644 index 0000000..c0a1c2d --- /dev/null +++ b/src/methods/dbscan/dbscan.cpp @@ -0,0 +1,243 @@ +#include <sicstus/sicstus.h> +/* ex_glue.h is generated by splfr from the foreign/[2,3] facts. + Always include the glue header in your foreign resource code. +*/ +#include "dbscan_glue.h" +#include <mlpack/methods/dbscan/dbscan.hpp> +#include <mlpack/methods/range_search/rs_model.hpp> +#include <mlpack/core.hpp> + +// including helper functions for converting between arma structures and arrays +#include "../../helper_files/helper.hpp" + +// some of the most used namespaces +using namespace arma; +using namespace mlpack; +using namespace std; +using namespace mlpack::dbscan; +using namespace mlpack::range; +using namespace mlpack::metric; + + +template<typename RangeSearchType = range::RangeSearch<>, typename PointSelectionPolicy = OrderedPointSelection> +void initAndCluster(double epsilon, SP_integer minPoints, SP_integer batchMode, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, + float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +{ + // convert the Prolog arrays to arma::mat + mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum); + + // get the ReturnVector + arma::Row<size_t> toReturnVector; + + // get the ReturnMat + mat toReturnMat; + + DBSCAN<RangeSearchType, PointSelectionPolicy>(epsilon, minPoints, (batchMode == 1)).Cluster(data, toReturnVector, toReturnMat); + + // return the Vector lenght + *assignmentsArrSize = toReturnVector.n_elem; + + // return the Vector as Array + *assignmentsArr = convertToArray(toReturnVector); + + // return the Matrix dimensions + *centroidsMatColNum = toReturnMat.n_cols; + *centroidsMatRowNum = toReturnMat.n_rows; + + // return the Matrix as one long Array + *centroidsMatArr = convertToArray(toReturnMat); +} + +// TODO: +// input: const double epsilon, +// const size_t minPoints, +// const bool batchMode = true, +// RangeSearchType rangeSearch = RangeSearchType(), +// PointSelectionPolicy pointSelector = PointSelectionPolicy() +// +// const MatType & data, +// arma::Row< size_t > & assignments, +// arma::mat & centroids +// output: +// description: +void dbscan(double epsilon, SP_integer minPoints, SP_integer batchMode, char const *selectionType, char const *treeType, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, + float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +{ + if(strcmp(selectionType, "ordered") == 0) + { + cout << "ordered" << endl; + if (strcmp(treeType, "kd") == 0) + { + cout << "kd" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::KDTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "r") == 0) + { + cout << "r" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "r_star") == 0) + { + cout << "r_star" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RStarTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "x") == 0) + { + cout << "x" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::XTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "hilbert_r") == 0) + { + cout << "hilbert_r" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::HilbertRTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "r_plus") == 0) + { + cout << "r_plus" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RPlusTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "r_plus_plus") == 0) + { + cout << "r_plus_plus" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RPlusPlusTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "cover") == 0) + { + cout << "cover" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::CoverTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "ball") == 0) + { + cout << "ball" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::BallTree>, OrderedPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else + { + cout << "wrong treeType input" << endl; + } + } + else if (strcmp(selectionType, "random") == 0) + { + cout << "random" << endl; + if (strcmp(treeType, "kd") == 0) + { + cout << "kd" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::KDTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "r") == 0) + { + cout << "r" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "r_star") == 0) + { + cout << "r_star" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RStarTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "x") == 0) + { + cout << "x" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::XTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "hilbert_r") == 0) + { + cout << "hilbert_r" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::HilbertRTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "r_plus") == 0) + { + cout << "r_plus" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RPlusTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "r_plus_plus") == 0) + { + cout << "r_plus_plus" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::RPlusPlusTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "cover") == 0) + { + cout << "cover" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::CoverTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else if (strcmp(treeType, "ball") == 0) + { + cout << "ball" << endl; + initAndCluster<RangeSearch<EuclideanDistance, mat, tree::BallTree>, RandomPointSelection>(epsilon, minPoints, batchMode, dataMatArr, dataMatSize, dataMatRowNum, assignmentsArr, assignmentsArrSize, + centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); + } + else + { + cout << "wrong treeType input" << endl; + } + } + else + { + cout << "wrong selectionType input" << endl; + } + +} + + +void takeNumberFunction(SP_integer integerNumber, double doubleNumber, char const *string) +{ + +} + +SP_integer returnNumberFunction() +{ + return 7; +} + +void takeMatrixFunction(float *givenMatArr, SP_integer givenMatSize, SP_integer givenMatRowNum) +{ + // convert the Prolog arrays to arma::mat + mat data = convertArrayToMat(givenMatArr, givenMatSize, givenMatRowNum); +} + +void takeArrayFunction(float *givenArr, SP_integer givenArrSize) +{ + // convert the Prolog arrays to arma::rowvec + rowvec givenVector = convertArrayToRowvec(givenArr, givenArrSize); +} + +void returnMatrixFunction(float **returnMatArr, SP_integer *returnMatColNum, SP_integer *returnMatRowNum) +{ + // get the ReturnMat + mat toReturnMat = rowvec(3, 3, fill::ones); + + // return the Matrix dimensions + *returnMatColNum = toReturnMat.n_cols; + *returnMatRowNum = toReturnMat.n_rows; + + // return the Matrix as one long Array + *returnMatArr = convertToArray(toReturnMat); +} + +void returnArrayFunction(float **returnArr, SP_integer *returnArrSize) +{ + // get the ReturnVector + rowvec toReturnVector = rowvec(3, fill::ones); + + // return the Vector lenght + *returnArrSize = toReturnVector.n_elem; + + // return the Vector as Array + *returnArr = convertToArray(toReturnVector); +} \ No newline at end of file diff --git a/src/methods/dbscan/dbscan.pl b/src/methods/dbscan/dbscan.pl new file mode 100644 index 0000000..62c68e8 --- /dev/null +++ b/src/methods/dbscan/dbscan.pl @@ -0,0 +1,36 @@ +:- module(dbscan, [dbscan/13]). + +%% requirements of library(struct) +:- load_files(library(str_decl), + [when(compile_time), if(changed)]). + +%% needed for using the array type +:- use_module(library(structs)). +:- use_module('../../helper_files/helper.pl'). + +%% type definitions for the float array +:- foreign_type + float32 = float_32, + float_array = array(float32). + +%% definitions for the connected function + +%% input: double epsilon +%% int minPoints +%% bool batchMode (1)true / (0)false +%% str selectionType "orderd", "random" +%% str treeType "kd", "r", "r_star", "x", "hilbert_r", "r_plus", "r_plus_plus", "cover", "ball" +%% mat data data(float_array), dataSize, dataRowNum +%% +%% output: vec assignments assign(float_array), assignSize +%% mat centroids cent(float_array), centColNum, centRowNum +%% +%% description: +foreign(dbscan, c, dbscan(+float32, +integer, +integer, +string, +string, +pointer(float_array), +integer, +integer, + -pointer(float_array), -integer, -pointer(float_array), -integer, -integer)). + + +%% Defines the functions that get connected from main.cpp +foreign_resource(dbscan, [dbscan]). + +:- load_foreign_resource(dbscan). \ No newline at end of file diff --git a/src/methods/dbscan/dbscan_test.pl b/src/methods/dbscan/dbscan_test.pl new file mode 100644 index 0000000..965aaf9 --- /dev/null +++ b/src/methods/dbscan/dbscan_test.pl @@ -0,0 +1,48 @@ +:- use_module(library(plunit)). + +:- use_module(dbscan). +:- use_module('../../helper_files/helper.pl'). + + +:- begin_tests(lists). + +test(dbscanKDTree) :- + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)), + dbscan(0.7, 1, 1, ordered, kd, X, Xsize, Xrownum, Assign, Assignsize, Centroids, Centroidscolnum, Centroidsrownum), + print(Assignsize), + convert_float_array_to_list(Assign, Assignsize, ResultsAssign), + print(ResultsAssign), + print(Centroidscolnum), + convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results), + print(Results). +test(dbscanRTree) :- + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)), + dbscan(0.7, 1, 1, ordered, r, X, Xsize, Xrownum, Assign, Assignsize, Centroids, Centroidscolnum, Centroidsrownum), + print(Assignsize), + convert_float_array_to_list(Assign, Assignsize, ResultsAssign), + print(ResultsAssign), + print(Centroidscolnum), + convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results), + print(Results). +test(dbscanRStarTree) :- + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)), + dbscan(0.7, 1, 1, ordered, r_star, X, Xsize, Xrownum, Assign, Assignsize, Centroids, Centroidscolnum, Centroidsrownum), + print(Assignsize), + convert_float_array_to_list(Assign, Assignsize, ResultsAssign), + print(ResultsAssign), + print(Centroidscolnum), + convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results), + print(Results). + +test(dbscanrandomKDTree) :- + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5, 1.4, 3.0, 2.1, 0.1],4, array(Xsize, Xrownum, X)), + dbscan(0.7, 1, 1, random, kd, X, Xsize, Xrownum, Assign, Assignsize, Centroids, Centroidscolnum, Centroidsrownum), + print(Assignsize), + convert_float_array_to_list(Assign, Assignsize, ResultsAssign), + print(ResultsAssign), + print(Centroidscolnum), + convert_float_array_to_2d_list(Centroids, Centroidscolnum, Centroidsrownum, Results), + print(Results). + + +:- end_tests(lists). \ No newline at end of file -- GitLab