... | ... | @@ -3,11 +3,11 @@ |
|
|
An implementation of an ID3-style decision tree for classification.
|
|
|
|
|
|
```prolog
|
|
|
:- use_module('path/to/.../src/methods/adaboost/adaboost_test.pl').
|
|
|
:- use_module('path/to/.../src/methods/decision_tree/decision_tree.pl').
|
|
|
|
|
|
%% usage example
|
|
|
adaboost_initModelWithTraining([5.1,3.5,1.4, 4.9,3.0,1.4, 4.7,3.2,1.3, 4.6,3.1,1.5], 3, [0,0,1,0], 2, perceptron, 50, 0.0001),
|
|
|
adaboost_classify([3,2,0, 5,1,4, 0,0,4, 3,3,5, 0,5,5, 2,5,5], 3, PredictionList, ProbabilitiesList, _).
|
|
|
decision_tree_initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,1,0,1], 2, 10, 0.5, 0),
|
|
|
decision_tree_classifyMatrix([3,2,0, 5,1,4, 0,0,4, 3,3,5, 0,5,5, 2,5,5], 3, PredictionsList, ProbabilitiesList, _).
|
|
|
```
|
|
|
|
|
|
# Available Predicates
|
... | ... | @@ -28,10 +28,21 @@ Construct the decision tree on the given data and labels, assuming that the data |
|
|
Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
|
|
|
|
|
|
```prolog
|
|
|
%% part of the predicate definition
|
|
|
initModel( +pointer(float_array), +integer, +integer,
|
|
|
%% predicate definition
|
|
|
decision_tree_initModel(DataList, DataRows, LabelsList, NumClasses, MinimumLeafSize, MinimumGainSplit, MaximumDepth) :-
|
|
|
NumClasses >= 0,
|
|
|
MinimumLeafSize > 0,
|
|
|
MinimumGainSplit > 0.0,
|
|
|
MinimumGainSplit < 1.0,
|
|
|
MaximumDepth >= 0,
|
|
|
convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrownum, X)),
|
|
|
convert_list_to_float_array(LabelsList, array(Ysize, Y)),
|
|
|
initModelI(X, Xsize, Xrownum, Y, Ysize, NumClasses, MinimumLeafSize, MinimumGainSplit, MaximumDepth).
|
|
|
|
|
|
%% foreign c++ predicate definition
|
|
|
foreign(initModel, c, initModelI( +pointer(float_array), +integer, +integer,
|
|
|
+pointer(float_array), +integer,
|
|
|
+integer, +integer, +float32, +integer).
|
|
|
+integer, +integer, +float32, +integer)).
|
|
|
```
|
|
|
|
|
|
### Parameters
|
... | ... | @@ -52,10 +63,16 @@ initModel( +pointer(float_array), +integer, +integer, |
|
|
Classify the given point and also return estimates of the probability for each class in the given vector.
|
|
|
|
|
|
```prolog
|
|
|
%% part of the predicate definition
|
|
|
classifyPoint( +pointer(float_array), +integer,
|
|
|
%% predicate definition
|
|
|
decision_tree_classifyPoint(DataList, Prediction, AssignList) :-
|
|
|
convert_list_to_float_array(DataList, array(Xsize, X)),
|
|
|
classifyPointI(X, Xsize, Prediction, Y, Ysize),
|
|
|
convert_float_array_to_list(Y, Ysize, AssignList).
|
|
|
|
|
|
%% foreign c++ predicate definition
|
|
|
foreign(classifyPoint, c, classifyPointI( +pointer(float_array), +integer,
|
|
|
-integer,
|
|
|
-pointer(float_array), -integer).
|
|
|
-pointer(float_array), -integer)).
|
|
|
```
|
|
|
|
|
|
### Parameters
|
... | ... | @@ -73,10 +90,17 @@ classifyPoint( +pointer(float_array), +integer, |
|
|
Classify the given points and also return estimates of the probabilities for each class in the given matrix.
|
|
|
|
|
|
```prolog
|
|
|
%% part of the predicate definition
|
|
|
classifyMatrix( +pointer(float_array), +integer, +integer,
|
|
|
%% predicate definition
|
|
|
decision_tree_classifyMatrix(DataList, DataRows, PredictionList, ProbsList, ZCols) :-
|
|
|
convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrows, X)),
|
|
|
classifyMatrixI(X, Xsize, Xrows, Y, Ysize, Z, ZCols, ZRows),
|
|
|
convert_float_array_to_list(Y, Ysize, PredictionList),
|
|
|
convert_float_array_to_2d_list(Z, ZCols, ZRows, ProbsList).
|
|
|
|
|
|
%% foreign c++ predicate definition
|
|
|
foreign(classifyMatrix, c, classifyMatrixI( +pointer(float_array), +integer, +integer,
|
|
|
-pointer(float_array), -integer,
|
|
|
-pointer(float_array), -integer).
|
|
|
-pointer(float_array), -integer, -integer)).
|
|
|
```
|
|
|
|
|
|
### Parameters
|
... | ... | @@ -96,11 +120,22 @@ Train the decision tree on the given data, assuming that all dimensions are nume |
|
|
This will overwrite the given model. Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit.
|
|
|
|
|
|
```prolog
|
|
|
%% part of the predicate definition
|
|
|
train( +pointer(float_array), +integer, +integer,
|
|
|
%% predicate definition
|
|
|
decision_tree_train(DataList, DataRows, LabelsList, NumClasses, MinimumLeafSize, MinimumGainSplit, MaximumDepth, Entropy) :-
|
|
|
NumClasses >= 0,
|
|
|
MinimumLeafSize > 0,
|
|
|
MinimumGainSplit > 0.0,
|
|
|
MinimumGainSplit < 1.0,
|
|
|
MaximumDepth >= 0,
|
|
|
convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrownum, X)),
|
|
|
convert_list_to_float_array(LabelsList, array(Ysize, Y)),
|
|
|
trainI(X, Xsize, Xrownum, Y, Ysize, NumClasses, MinimumLeafSize, MinimumGainSplit, MaximumDepth, Entropy).
|
|
|
|
|
|
%% foreign c++ predicate definition
|
|
|
foreign(train, c, trainI(+pointer(float_array), +integer, +integer,
|
|
|
+pointer(float_array), +integer,
|
|
|
+integer, +integer, +float32, +integer,
|
|
|
[-float32]).
|
|
|
[-float32])).
|
|
|
```
|
|
|
|
|
|
### Parameters
|
... | ... | |