From ff2511eba623c74d11ea2b989026fd3151560aa0 Mon Sep 17 00:00:00 2001 From: Jakhes <dean.schmitz@schmitzbauer.de> Date: Fri, 23 Sep 2022 17:54:07 +0200 Subject: [PATCH] Adding hoeffding_tree --- Makefile | 2 + src/methods/hoeffding_tree/Makefile | 8 ++ src/methods/hoeffding_tree/hoeffding_tree.cpp | 108 ++++++++++++++++++ src/methods/hoeffding_tree/hoeffding_tree.pl | 74 ++++++++++++ .../hoeffding_tree/hoeffding_tree_test.pl | 56 +++++++++ 5 files changed, 248 insertions(+) create mode 100644 src/methods/hoeffding_tree/Makefile create mode 100644 src/methods/hoeffding_tree/hoeffding_tree.cpp create mode 100644 src/methods/hoeffding_tree/hoeffding_tree.pl create mode 100644 src/methods/hoeffding_tree/hoeffding_tree_test.pl diff --git a/Makefile b/Makefile index ecbabb6..bbe1ff6 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,7 @@ all: make -C src/methods/dbscan splfr=$(SPLFR_PATH) make -C src/methods/emst splfr=$(SPLFR_PATH) make -C src/methods/fastmks splfr=$(SPLFR_PATH) + make -C src/methods/hoeffding_tree splfr=$(SPLFR_PATH) make -C src/methods/kde splfr=$(SPLFR_PATH) make -C src/methods/kernel_pca splfr=$(SPLFR_PATH) make -C src/methods/kmeans splfr=$(SPLFR_PATH) @@ -39,6 +40,7 @@ clean: make -C src/methods/dbscan clean make -C src/methods/emst clean make -C src/methods/fastmks clean + make -C src/methods/hoeffding_tree clean make -C src/methods/kde clean make -C src/methods/kernel_pca clean make -C src/methods/kmeans clean diff --git a/src/methods/hoeffding_tree/Makefile b/src/methods/hoeffding_tree/Makefile new file mode 100644 index 0000000..d37037f --- /dev/null +++ b/src/methods/hoeffding_tree/Makefile @@ -0,0 +1,8 @@ +splfr=/usr/local/sicstus4.7.1/bin/splfr + +METHOD_NAME=hoeffding_tree + +$(METHOD_NAME).so: $(METHOD_NAME).pl $(METHOD_NAME).cpp + $(splfr) -larmadillo -fopenmp -lmlpack -lstdc++ -cxx --struct $(METHOD_NAME).pl $(METHOD_NAME).cpp ../../helper_files/helper.cpp +clean: + rm $(METHOD_NAME).so diff --git a/src/methods/hoeffding_tree/hoeffding_tree.cpp b/src/methods/hoeffding_tree/hoeffding_tree.cpp new file mode 100644 index 0000000..1d1cfc4 --- /dev/null +++ b/src/methods/hoeffding_tree/hoeffding_tree.cpp @@ -0,0 +1,108 @@ +#include <sicstus/sicstus.h> +/* ex_glue.h is generated by splfr from the foreign/[2,3] facts. + Always include the glue header in your foreign resource code. +*/ +#include "hoeffding_tree_glue.h" +#include <mlpack/methods/hoeffding_trees/hoeffding_tree_model.hpp> +#include <mlpack/core.hpp> + +// including helper functions for converting between arma structures and arrays +#include "../../helper_files/helper.hpp" + +// some of the most used namespaces +using namespace arma; +using namespace mlpack; +using namespace std; +using namespace mlpack::tree; + +// Global Variable of the HoeffdingTreeModel object so it can be accessed from all functions +HoeffdingTreeModel hoeffdingTreeObj; + +// TODO: +// input: const TreeType & type = GINI_HOEFFDING, +// const arma::mat & dataset, +// const data::DatasetInfo & datasetInfo, +// const arma::Row< size_t > & labels, +// const size_t numClasses, +// const bool batchTraining, +// const double successProbability, +// const size_t maxSamples, +// const size_t checkInterval, +// const size_t minSamples, +// const size_t bins, +// const size_t observationsBeforeBinning +// output: +// description: +void initAndBuildModel(char const *treeType, + float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, + float *labelsArr, SP_integer labelsArrSize, + SP_integer numClasses, SP_integer batchTraining, double successProbability, SP_integer maxSamples, SP_integer checkInterval, SP_integer minSamples, SP_integer bins, SP_integer observationsBeforeBinning) +{ + // convert the Prolog arrays to arma::mat + mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum); + + // convert the Prolog arrays to arma::rowvec + Row< size_t > labelsVector = convertArrayToVec(labelsArr, labelsArrSize); + + if (strcmp(treeType, "gini-hoeffding") == 0) + hoeffdingTreeObj = HoeffdingTreeModel(HoeffdingTreeModel::GINI_HOEFFDING); + else if (strcmp(treeType, "gini-binary") == 0) + hoeffdingTreeObj = HoeffdingTreeModel(HoeffdingTreeModel::GINI_BINARY); + else if (strcmp(treeType, "info-hoeffding") == 0) + hoeffdingTreeObj = HoeffdingTreeModel(HoeffdingTreeModel::INFO_HOEFFDING); + else if (strcmp(treeType, "info-binary") == 0) + hoeffdingTreeObj = HoeffdingTreeModel(HoeffdingTreeModel::INFO_BINARY); + else + cout << "wrong treeType input" << endl; + + hoeffdingTreeObj.BuildModel(data, data::DatasetInfo(data.n_rows), labelsVector, numClasses, (batchTraining == 1), successProbability, maxSamples, checkInterval, minSamples, bins, observationsBeforeBinning); +} + +// TODO: +// input: const arma::mat & dataset, +// arma::Row< size_t > & predictions <-, +// arma::rowvec & probabilities <- +// output: +// description: +void classify(float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, + float **predictArr, SP_integer *predictArrSize, + float **probsArr, SP_integer *probsArrSize) +{ + // convert the Prolog arrays to arma::mat + mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum); + + // create the ReturnVector + Row< size_t > predictReturnVector; + + // create the ReturnVector + rowvec probsReturnVector; + + + hoeffdingTreeObj.Classify(data, predictReturnVector, probsReturnVector); + + + // return the Vector + returnVectorInformation(predictReturnVector, predictArr, predictArrSize); + + // return the Vector + returnVectorInformation(probsReturnVector, probsArr, probsArrSize); +} + +// TODO: +// input: const arma::mat & dataset, +// const arma::Row< size_t > & labels, +// const bool batchTraining +// output: +// description: +void train(float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, + float *labelsArr, SP_integer labelsArrSize, + SP_integer batchTraining) +{ + // convert the Prolog arrays to arma::mat + mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum); + + // convert the Prolog arrays to arma::rowvec + Row< size_t > labelsVector = convertArrayToVec(labelsArr, labelsArrSize); + + hoeffdingTreeObj.Train(data, labelsVector, (batchTraining == 1)); +} \ No newline at end of file diff --git a/src/methods/hoeffding_tree/hoeffding_tree.pl b/src/methods/hoeffding_tree/hoeffding_tree.pl new file mode 100644 index 0000000..3836ad6 --- /dev/null +++ b/src/methods/hoeffding_tree/hoeffding_tree.pl @@ -0,0 +1,74 @@ +:- module(hoeffding_tree, [ initAndBuildModel/14, + classify/7, + train/6]). + +%% requirements of library(struct) +:- load_files(library(str_decl), + [when(compile_time), if(changed)]). + +%% needed for using the array type +:- use_module(library(structs)). +:- use_module('../../helper_files/helper.pl'). + +%% type definitions for the float array +:- foreign_type + float32 = float_32, + float_array = array(float32). + +%% definitions for the connected function + +%% TODO: +%% --Input-- +%% string treeType => "gini-hoeffding", "gini-binary", "info-hoeffding", "info-binary", +%% mat dataset, +%% vec labels, +%% int numClasses, +%% bool batchTrainig => (1)true / (0)false, +%% float32 successProbability, +%% int maxSamples, +%% int checkInterval, +%% int minSamples, +%% int bins, +%% int observationsBeforeBinning +%% +%% --Output-- +%% +%% --Description-- +foreign(initAndBuildModel, c, initAndBuildModel(+string, + +pointer(float_array), +integer, +integer, + +pointer(float_array), +integer, + +integer, +integer, +float32, +integer, +integer, +integer, +integer, +integer)). + +%% TODO: +%% --Input-- +%% mat data +%% +%% --Output-- +%% vec predictions, +%% vec probabilities +%% +%% --Description-- +foreign(classify, c, classify( +pointer(float_array), +integer, +integer, + -pointer(float_array), -integer, + -pointer(float_array), -integer)). + +%% TODO: +%% --Input-- +%% mat data, +%% vec labels, +%% bool batchTrainig => (1)true / (0)false +%% +%% --Output-- +%% +%% --Description-- +foreign(train, c, train(+pointer(float_array), +integer, +integer, + +pointer(float_array), +integer, + +integer)). + + +%% Defines the functions that get connected from main.cpp +foreign_resource(hoeffding_tree, [ initAndBuildModel, + classify, + train]). + +:- load_foreign_resource(hoeffding_tree). \ No newline at end of file diff --git a/src/methods/hoeffding_tree/hoeffding_tree_test.pl b/src/methods/hoeffding_tree/hoeffding_tree_test.pl new file mode 100644 index 0000000..cf01d51 --- /dev/null +++ b/src/methods/hoeffding_tree/hoeffding_tree_test.pl @@ -0,0 +1,56 @@ +:- use_module(library(plunit)). + +:- use_module(hoeffding_tree). +:- use_module('../../helper_files/helper.pl'). + +reset_Model :- + initModel(1,0,50,0.0001). + +:- begin_tests(lists). + +%% alpha tests +test(alpha_std_init) :- + reset_Model, + alpha(0). +test(alpha_wrong_input, fail) :- + reset_Model, + alpha(1). +test(alpha_after_train, A =:= 9223372036854775808) :- + reset_Model, + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5],3, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize), + alpha(A). + +%% train tests +test(correct_train) :- + reset_Model, + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5],3, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train, fail) :- + reset_Model, + convert_list_to_float_array([],3, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train2, fail) :- + reset_Model, + convert_list_to_float_array([],0, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train3, fail) :- + reset_Model, + convert_list_to_float_array([1,2],0, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train3, fail) :- + reset_Model, + convert_list_to_float_array([1,2,44,3],3, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train4) :- + reset_Model, + convert_list_to_float_array([1,2,44,3],2, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +:- end_tests(lists). \ No newline at end of file -- GitLab