From ff2511eba623c74d11ea2b989026fd3151560aa0 Mon Sep 17 00:00:00 2001
From: Jakhes <dean.schmitz@schmitzbauer.de>
Date: Fri, 23 Sep 2022 17:54:07 +0200
Subject: [PATCH] Adding hoeffding_tree

---
 Makefile                                      |   2 +
 src/methods/hoeffding_tree/Makefile           |   8 ++
 src/methods/hoeffding_tree/hoeffding_tree.cpp | 108 ++++++++++++++++++
 src/methods/hoeffding_tree/hoeffding_tree.pl  |  74 ++++++++++++
 .../hoeffding_tree/hoeffding_tree_test.pl     |  56 +++++++++
 5 files changed, 248 insertions(+)
 create mode 100644 src/methods/hoeffding_tree/Makefile
 create mode 100644 src/methods/hoeffding_tree/hoeffding_tree.cpp
 create mode 100644 src/methods/hoeffding_tree/hoeffding_tree.pl
 create mode 100644 src/methods/hoeffding_tree/hoeffding_tree_test.pl

diff --git a/Makefile b/Makefile
index ecbabb6..bbe1ff6 100644
--- a/Makefile
+++ b/Makefile
@@ -10,6 +10,7 @@ all:
 	make -C src/methods/dbscan splfr=$(SPLFR_PATH)
 	make -C src/methods/emst splfr=$(SPLFR_PATH)
 	make -C src/methods/fastmks splfr=$(SPLFR_PATH)
+	make -C src/methods/hoeffding_tree splfr=$(SPLFR_PATH)
 	make -C src/methods/kde splfr=$(SPLFR_PATH)
 	make -C src/methods/kernel_pca splfr=$(SPLFR_PATH)
 	make -C src/methods/kmeans splfr=$(SPLFR_PATH)
@@ -39,6 +40,7 @@ clean:
 	make -C src/methods/dbscan clean
 	make -C src/methods/emst clean
 	make -C src/methods/fastmks clean
+	make -C src/methods/hoeffding_tree clean
 	make -C src/methods/kde clean
 	make -C src/methods/kernel_pca clean
 	make -C src/methods/kmeans clean
diff --git a/src/methods/hoeffding_tree/Makefile b/src/methods/hoeffding_tree/Makefile
new file mode 100644
index 0000000..d37037f
--- /dev/null
+++ b/src/methods/hoeffding_tree/Makefile
@@ -0,0 +1,8 @@
+splfr=/usr/local/sicstus4.7.1/bin/splfr
+
+METHOD_NAME=hoeffding_tree
+
+$(METHOD_NAME).so: $(METHOD_NAME).pl $(METHOD_NAME).cpp
+	$(splfr) -larmadillo -fopenmp -lmlpack -lstdc++ -cxx --struct $(METHOD_NAME).pl $(METHOD_NAME).cpp ../../helper_files/helper.cpp
+clean:
+	rm $(METHOD_NAME).so
diff --git a/src/methods/hoeffding_tree/hoeffding_tree.cpp b/src/methods/hoeffding_tree/hoeffding_tree.cpp
new file mode 100644
index 0000000..1d1cfc4
--- /dev/null
+++ b/src/methods/hoeffding_tree/hoeffding_tree.cpp
@@ -0,0 +1,108 @@
+#include <sicstus/sicstus.h>
+/* ex_glue.h is generated by splfr from the foreign/[2,3] facts.
+   Always include the glue header in your foreign resource code.
+*/
+#include "hoeffding_tree_glue.h"
+#include <mlpack/methods/hoeffding_trees/hoeffding_tree_model.hpp>
+#include <mlpack/core.hpp>
+
+// including helper functions for converting between arma structures and arrays
+#include "../../helper_files/helper.hpp"
+
+// some of the most used namespaces
+using namespace arma;
+using namespace mlpack;
+using namespace std;
+using namespace mlpack::tree;
+
+// Global Variable of the HoeffdingTreeModel object so it can be accessed from all functions
+HoeffdingTreeModel hoeffdingTreeObj;
+
+// TODO: 
+// input:   const TreeType & 	            type = GINI_HOEFFDING,
+//          const arma::mat & 	            dataset,
+//          const data::DatasetInfo & 	    datasetInfo,
+//          const arma::Row< size_t > & 	labels,
+//          const size_t 	                numClasses,
+//          const bool 	                    batchTraining,
+//          const double 	                successProbability,
+//          const size_t 	                maxSamples,
+//          const size_t 	                checkInterval,
+//          const size_t 	                minSamples,
+//          const size_t 	                bins,
+//          const size_t 	                observationsBeforeBinning
+// output: 
+// description: 
+void initAndBuildModel(char const *treeType,
+                        float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum,
+                        float *labelsArr, SP_integer labelsArrSize,
+                        SP_integer numClasses, SP_integer batchTraining, double successProbability, SP_integer maxSamples, SP_integer checkInterval, SP_integer minSamples, SP_integer bins, SP_integer observationsBeforeBinning)
+{
+    // convert the Prolog arrays to arma::mat
+    mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum);
+
+    // convert the Prolog arrays to arma::rowvec
+    Row< size_t > labelsVector = convertArrayToVec(labelsArr, labelsArrSize);
+
+    if (strcmp(treeType, "gini-hoeffding") == 0)
+        hoeffdingTreeObj = HoeffdingTreeModel(HoeffdingTreeModel::GINI_HOEFFDING);
+    else if (strcmp(treeType, "gini-binary") == 0)
+        hoeffdingTreeObj = HoeffdingTreeModel(HoeffdingTreeModel::GINI_BINARY);
+    else if (strcmp(treeType, "info-hoeffding") == 0)
+        hoeffdingTreeObj = HoeffdingTreeModel(HoeffdingTreeModel::INFO_HOEFFDING);
+    else if (strcmp(treeType, "info-binary") == 0)
+        hoeffdingTreeObj = HoeffdingTreeModel(HoeffdingTreeModel::INFO_BINARY);
+    else
+        cout << "wrong treeType input" << endl;
+    
+    hoeffdingTreeObj.BuildModel(data, data::DatasetInfo(data.n_rows), labelsVector, numClasses, (batchTraining == 1), successProbability, maxSamples, checkInterval, minSamples, bins, observationsBeforeBinning);
+}
+
+// TODO: 
+// input:   const arma::mat & 	dataset,
+//          arma::Row< size_t > & 	predictions <-,
+//          arma::rowvec & 	probabilities <-
+// output: 
+// description: 
+void classify(float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum,
+                float **predictArr, SP_integer *predictArrSize,
+                float **probsArr, SP_integer *probsArrSize)
+{
+    // convert the Prolog arrays to arma::mat
+    mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum);
+
+    // create the ReturnVector
+    Row< size_t > predictReturnVector;
+    
+    // create the ReturnVector
+    rowvec probsReturnVector;
+
+
+    hoeffdingTreeObj.Classify(data, predictReturnVector, probsReturnVector);
+
+    
+    // return the Vector
+    returnVectorInformation(predictReturnVector, predictArr, predictArrSize);
+
+    // return the Vector
+    returnVectorInformation(probsReturnVector, probsArr, probsArrSize);
+}
+
+// TODO: 
+// input:   const arma::mat & 	dataset,
+//          const arma::Row< size_t > & 	labels,
+//          const bool 	batchTraining
+// output: 
+// description: 
+void train(float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum,
+            float *labelsArr, SP_integer labelsArrSize,
+            SP_integer batchTraining)
+{
+    // convert the Prolog arrays to arma::mat
+    mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum);
+    
+    // convert the Prolog arrays to arma::rowvec
+    Row< size_t > labelsVector = convertArrayToVec(labelsArr, labelsArrSize);
+
+    hoeffdingTreeObj.Train(data, labelsVector, (batchTraining == 1));
+}
\ No newline at end of file
diff --git a/src/methods/hoeffding_tree/hoeffding_tree.pl b/src/methods/hoeffding_tree/hoeffding_tree.pl
new file mode 100644
index 0000000..3836ad6
--- /dev/null
+++ b/src/methods/hoeffding_tree/hoeffding_tree.pl
@@ -0,0 +1,74 @@
+:- module(hoeffding_tree, [     initAndBuildModel/14,
+                                classify/7,
+                                train/6]).
+
+%% requirements of library(struct)
+:- load_files(library(str_decl),
+        [when(compile_time), if(changed)]).
+
+%% needed for using the array type
+:- use_module(library(structs)).
+:- use_module('../../helper_files/helper.pl').
+
+%% type definitions for the float array
+:- foreign_type
+        float32          = float_32,
+        float_array      = array(float32).
+
+%% definitions for the connected function
+
+%% TODO: 
+%% --Input--
+%%              string  treeType                => "gini-hoeffding", "gini-binary", "info-hoeffding", "info-binary",
+%%              mat     dataset,
+%%              vec     labels,
+%%              int     numClasses,
+%%              bool    batchTrainig            => (1)true / (0)false,
+%%              float32 successProbability,
+%%              int     maxSamples,
+%%              int     checkInterval,
+%%              int     minSamples,
+%%              int     bins,
+%%              int     observationsBeforeBinning
+%%
+%% --Output--
+%%
+%% --Description--
+foreign(initAndBuildModel, c, initAndBuildModel(+string,
+                                                +pointer(float_array), +integer, +integer,
+                                                +pointer(float_array), +integer,
+                                                +integer, +integer, +float32, +integer, +integer, +integer, +integer, +integer)).
+
+%% TODO: 
+%% --Input--
+%%              mat     data
+%%
+%% --Output--
+%%              vec     predictions,
+%%              vec     probabilities
+%%
+%% --Description--
+foreign(classify, c, classify(  +pointer(float_array), +integer, +integer,
+                                -pointer(float_array), -integer,
+                                -pointer(float_array), -integer)).
+
+%% TODO: 
+%% --Input--
+%%              mat     data,
+%%              vec     labels,
+%%              bool    batchTrainig            => (1)true / (0)false
+%%
+%% --Output--
+%%
+%% --Description--
+foreign(train, c, train(+pointer(float_array), +integer, +integer,
+                        +pointer(float_array), +integer,
+                        +integer)).
+
+
+%% Defines the functions that get connected from main.cpp
+foreign_resource(hoeffding_tree, [      initAndBuildModel,
+                                        classify,
+                                        train]).
+
+:- load_foreign_resource(hoeffding_tree).
\ No newline at end of file
diff --git a/src/methods/hoeffding_tree/hoeffding_tree_test.pl b/src/methods/hoeffding_tree/hoeffding_tree_test.pl
new file mode 100644
index 0000000..cf01d51
--- /dev/null
+++ b/src/methods/hoeffding_tree/hoeffding_tree_test.pl
@@ -0,0 +1,56 @@
+:- use_module(library(plunit)).
+
+:- use_module(hoeffding_tree).
+:- use_module('../../helper_files/helper.pl').
+
+reset_Model :-
+        initModel(1,0,50,0.0001).
+
+:- begin_tests(lists).
+
+%% alpha tests
+test(alpha_std_init) :-
+        reset_Model,
+        alpha(0).
+test(alpha_wrong_input, fail) :-
+        reset_Model,
+        alpha(1).
+test(alpha_after_train, A =:= 9223372036854775808) :-
+        reset_Model,
+        convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5],3, array(Xsize, Xrownum, X)),
+        convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)),
+        train(X,Xsize, Xrownum,Y, Ysize),
+        alpha(A).
+
+%% train tests
+test(correct_train) :-
+        reset_Model,
+        convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5],3, array(Xsize, Xrownum, X)),
+        convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)),
+        train(X,Xsize, Xrownum,Y, Ysize).
+test(false_train, fail) :-
+        reset_Model,
+        convert_list_to_float_array([],3, array(Xsize, Xrownum, X)),
+        convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)),
+        train(X,Xsize, Xrownum,Y, Ysize).
+test(false_train2, fail) :-
+        reset_Model,
+        convert_list_to_float_array([],0, array(Xsize, Xrownum, X)),
+        convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)),
+        train(X,Xsize, Xrownum,Y, Ysize).
+test(false_train3, fail) :-
+        reset_Model,
+        convert_list_to_float_array([1,2],0, array(Xsize, Xrownum, X)),
+        convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)),
+        train(X,Xsize, Xrownum,Y, Ysize).
+test(false_train3, fail) :-
+        reset_Model,
+        convert_list_to_float_array([1,2,44,3],3, array(Xsize, Xrownum, X)),
+        convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)),
+        train(X,Xsize, Xrownum,Y, Ysize).
+test(false_train4) :-
+        reset_Model,
+        convert_list_to_float_array([1,2,44,3],2, array(Xsize, Xrownum, X)),
+        convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)),
+        train(X,Xsize, Xrownum,Y, Ysize).
+:- end_tests(lists).
\ No newline at end of file
-- 
GitLab