Update decision_tree authored by Dean Samuel Schmitz's avatar Dean Samuel Schmitz
......@@ -3,11 +3,11 @@
An implementation of an ID3-style decision tree for classification.
```prolog
:- use_module('path/to/.../src/methods/adaboost/adaboost_test.pl').
:- use_module('path/to/.../src/methods/decision_tree/decision_tree.pl').
%% usage example
adaboost_initModelWithTraining([5.1,3.5,1.4, 4.9,3.0,1.4, 4.7,3.2,1.3, 4.6,3.1,1.5], 3, [0,0,1,0], 2, perceptron, 50, 0.0001),
adaboost_classify([3,2,0, 5,1,4, 0,0,4, 3,3,5, 0,5,5, 2,5,5], 3, PredictionList, ProbabilitiesList, _).
decision_tree_initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,1,0,1], 2, 10, 0.5, 0),
decision_tree_classifyMatrix([3,2,0, 5,1,4, 0,0,4, 3,3,5, 0,5,5, 2,5,5], 3, PredictionsList, ProbabilitiesList, _).
```
# Available Predicates
......@@ -28,10 +28,21 @@ Construct the decision tree on the given data and labels, assuming that the data
Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
```prolog
%% part of the predicate definition
initModel( +pointer(float_array), +integer, +integer,
%% predicate definition
decision_tree_initModel(DataList, DataRows, LabelsList, NumClasses, MinimumLeafSize, MinimumGainSplit, MaximumDepth) :-
NumClasses >= 0,
MinimumLeafSize > 0,
MinimumGainSplit > 0.0,
MinimumGainSplit < 1.0,
MaximumDepth >= 0,
convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrownum, X)),
convert_list_to_float_array(LabelsList, array(Ysize, Y)),
initModelI(X, Xsize, Xrownum, Y, Ysize, NumClasses, MinimumLeafSize, MinimumGainSplit, MaximumDepth).
%% foreign c++ predicate definition
foreign(initModel, c, initModelI( +pointer(float_array), +integer, +integer,
+pointer(float_array), +integer,
+integer, +integer, +float32, +integer).
+integer, +integer, +float32, +integer)).
```
### Parameters
......@@ -52,10 +63,16 @@ initModel( +pointer(float_array), +integer, +integer,
Classify the given point and also return estimates of the probability for each class in the given vector.
```prolog
%% part of the predicate definition
classifyPoint( +pointer(float_array), +integer,
%% predicate definition
decision_tree_classifyPoint(DataList, Prediction, AssignList) :-
convert_list_to_float_array(DataList, array(Xsize, X)),
classifyPointI(X, Xsize, Prediction, Y, Ysize),
convert_float_array_to_list(Y, Ysize, AssignList).
%% foreign c++ predicate definition
foreign(classifyPoint, c, classifyPointI( +pointer(float_array), +integer,
-integer,
-pointer(float_array), -integer).
-pointer(float_array), -integer)).
```
### Parameters
......@@ -73,10 +90,17 @@ classifyPoint( +pointer(float_array), +integer,
Classify the given points and also return estimates of the probabilities for each class in the given matrix.
```prolog
%% part of the predicate definition
classifyMatrix( +pointer(float_array), +integer, +integer,
%% predicate definition
decision_tree_classifyMatrix(DataList, DataRows, PredictionList, ProbsList, ZCols) :-
convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrows, X)),
classifyMatrixI(X, Xsize, Xrows, Y, Ysize, Z, ZCols, ZRows),
convert_float_array_to_list(Y, Ysize, PredictionList),
convert_float_array_to_2d_list(Z, ZCols, ZRows, ProbsList).
%% foreign c++ predicate definition
foreign(classifyMatrix, c, classifyMatrixI( +pointer(float_array), +integer, +integer,
-pointer(float_array), -integer,
-pointer(float_array), -integer).
-pointer(float_array), -integer, -integer)).
```
### Parameters
......@@ -96,11 +120,22 @@ Train the decision tree on the given data, assuming that all dimensions are nume
This will overwrite the given model. Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
```prolog
%% part of the predicate definition
train( +pointer(float_array), +integer, +integer,
%% predicate definition
decision_tree_train(DataList, DataRows, LabelsList, NumClasses, MinimumLeafSize, MinimumGainSplit, MaximumDepth, Entropy) :-
NumClasses >= 0,
MinimumLeafSize > 0,
MinimumGainSplit > 0.0,
MinimumGainSplit < 1.0,
MaximumDepth >= 0,
convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrownum, X)),
convert_list_to_float_array(LabelsList, array(Ysize, Y)),
trainI(X, Xsize, Xrownum, Y, Ysize, NumClasses, MinimumLeafSize, MinimumGainSplit, MaximumDepth, Entropy).
%% foreign c++ predicate definition
foreign(train, c, trainI(+pointer(float_array), +integer, +integer,
+pointer(float_array), +integer,
+integer, +integer, +float32, +integer,
[-float32]).
[-float32])).
```
### Parameters
......
......