diff --git a/src/methods/decision_tree/decision_tree.cpp b/src/methods/decision_tree/decision_tree.cpp index e592413e030901118c192caab548ea66440095ab..8e6e7e794ce7e07ca7661c09fb35fb538df32084 100644 --- a/src/methods/decision_tree/decision_tree.cpp +++ b/src/methods/decision_tree/decision_tree.cpp @@ -37,7 +37,16 @@ void initModel( float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRow // convert the Prolog array to arma::rowvec Row<size_t> labelsVector = convertArrayToVec(labelsArr, labelsArrSize); - + try + { + decisionTreeObj = DecisionTree(data, labelsVector, numClasses, minimumLeafSize, minimumGainSplit, maximumDepth); + } + catch(const std::out_of_range& e) + { + raisePrologSystemExeption("Labels Vector is too short or its values are incorrect: should fit into [0,numClasses)!"); + return; + } + decisionTreeObj = DecisionTree(data, labelsVector, numClasses, minimumLeafSize, minimumGainSplit, maximumDepth); } @@ -59,9 +68,17 @@ void classifyPoint( float *pointArr, SP_integer pointArrSize, // create the ReturnVector vec probsReturnVector; + - - decisionTreeObj.Classify(pointVector, predictReturn, probsReturnVector); + try + { + decisionTreeObj.Classify(pointVector, predictReturn, probsReturnVector); + } + catch(const std::logic_error& e) + { + raisePrologSystemExeption("Given Point has the wrong Length!"); + return; + } // return prediction value @@ -79,26 +96,33 @@ void classifyPoint( float *pointArr, SP_integer pointArrSize, // description: void classifyMatrix(float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, float **predictArr, SP_integer *predictArrSize, - float **probsArr, SP_integer *probsArrSize) + float **probsMatArr, SP_integer *probsMatColNum, SP_integer *probsMatRowNum) { // convert the Prolog array to arma::mat mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum); - + // create the ReturnVector Row< size_t > predictReturnVector; + // create the ReturnMat + mat probsReturnMat; - // create the ReturnVector - rowvec probsReturnVector; - - decisionTreeObj.Classify(data, predictReturnVector, probsReturnVector); + try + { + decisionTreeObj.Classify(data, predictReturnVector, probsReturnMat); + } + catch(const std::logic_error& e) + { + raisePrologSystemExeption(e.what()); + return; + } // return the Vector returnVectorInformation(predictReturnVector, predictArr, predictArrSize); - - // return the Vector - returnVectorInformation(probsReturnVector, probsArr, probsArrSize); + /// return the Matrix + returnMatrixInformation(probsReturnMat, probsMatArr, probsMatColNum, probsMatRowNum); + } // TODO: @@ -122,5 +146,15 @@ double train( float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRow // convert the Prolog array to arma::rowvec Row<size_t> labelsVector = convertArrayToVec(labelsArr, labelsArrSize); - return decisionTreeObj.Train(data, labelsVector, numClasses, minimumLeafSize, minimumGainSplit, maximumDepth); + try + { + return decisionTreeObj.Train(data, labelsVector, numClasses, minimumLeafSize, minimumGainSplit, maximumDepth); + } + catch(const std::invalid_argument& e) + { + raisePrologSystemExeption(e.what()); + return 0.0; + } + + } diff --git a/src/methods/decision_tree/decision_tree.pl b/src/methods/decision_tree/decision_tree.pl index c053b099e1869eb9a7f77ea9b0957bffb9d9ac4f..687ea26abd4555ae3b47f17c215387fe44af58c7 100644 --- a/src/methods/decision_tree/decision_tree.pl +++ b/src/methods/decision_tree/decision_tree.pl @@ -1,8 +1,8 @@ -:- module(decision_tree, [ initModel/9, - classifyPoint/5, - classifyMatrix/7, - train/10]). +:- module(decision_tree, [ initModel/7, + classifyPoint/3, + classifyMatrix/5, + train/8]). %% requirements of library(struct) :- load_files(library(str_decl), @@ -20,6 +20,9 @@ %% definitions for the connected function +foreign(initModel, c, initModelI( +pointer(float_array), +integer, +integer, + +pointer(float_array), +integer, + +integer, +integer, +float32, +integer)). %% --Input-- %% mat dataset, %% vec labels, @@ -34,11 +37,20 @@ %% Construct the decision tree on the given data and labels, assuming that the data is all of the numeric type. %% Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit. %% -foreign(initModel, c, initModel( +pointer(float_array), +integer, +integer, - +pointer(float_array), +integer, - +integer, +integer, +float32, +integer)). - - +initModel(DataList, DataRows, LabelsList, NumClasses, MinimumLeafSize, MinimumGainSplit, MaximumDepth) :- + NumClasses >= 0, + MinimumLeafSize > 0, + MinimumGainSplit > 0.0, + MinimumGainSplit < 1.0, + MaximumDepth >= 0, + convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrownum, X)), + convert_list_to_float_array(LabelsList, array(Ysize, Y)), + initModelI(X, Xsize, Xrownum, Y, Ysize, NumClasses, MinimumLeafSize, MinimumGainSplit, MaximumDepth). + + +foreign(classifyPoint, c, classifyPointI(+pointer(float_array), +integer, + -integer, + -pointer(float_array), -integer)). %% --Input-- %% vec point %% @@ -49,26 +61,36 @@ foreign(initModel, c, initModel( +pointer(float_array), +integer, +intege %% --Description-- %% Classify the given point and also return estimates of the probability for each class in the given vector. %% -foreign(classifyPoint, c, classifyPoint(+pointer(float_array), +integer, - -integer, - -pointer(float_array), -integer)). +classifyPoint(DataList, Prediction, AssignList) :- + convert_list_to_float_array(DataList, array(Xsize, X)), + classifyPointI(X, Xsize, Prediction, Y, Ysize), + convert_float_array_to_list(Y, Ysize, AssignList). +foreign(classifyMatrix, c, classifyMatrixI( +pointer(float_array), +integer, +integer, + -pointer(float_array), -integer, + -pointer(float_array), -integer, -integer)). %% --Input-- %% mat data %% %% --Output-- %% vec predictions, -%% vec probabilities +%% mat probabilities %% %% --Description-- %% Classify the given points and also return estimates of the probabilities for each class in the given matrix. %% -foreign(classifyMatrix, c, classifyMatrix( +pointer(float_array), +integer, +integer, - -pointer(float_array), -integer, - -pointer(float_array), -integer)). +classifyMatrix(DataList, DataRows, PredictionList, ProbsList, ZCols) :- + convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrows, X)), + classifyMatrixI(X, Xsize, Xrows, Y, Ysize, Z, ZCols, ZRows), + convert_float_array_to_list(Y, Ysize, PredictionList), + convert_float_array_to_2d_list(Z, ZCols, ZRows, ProbsList). +foreign(train, c, trainI(+pointer(float_array), +integer, +integer, + +pointer(float_array), +integer, + +integer, +integer, +float32, +integer, + [-float32])). %% --Input-- %% mat data, %% vec labels, @@ -84,10 +106,15 @@ foreign(classifyMatrix, c, classifyMatrix( +pointer(float_array), +integer, %% Train the decision tree on the given data, assuming that all dimensions are numeric. %% This will overwrite the given model. Setting minimumLeafSize and minimumGainSplit too small may cause the tree to overfit, but setting them too large may cause it to underfit. %% -foreign(train, c, train(+pointer(float_array), +integer, +integer, - +pointer(float_array), +integer, - +integer, +integer, +float32, +integer, - [-float32])). +train(DataList, DataRows, LabelsList, NumClasses, MinimumLeafSize, MinimumGainSplit, MaximumDepth, Entropy) :- + NumClasses >= 0, + MinimumLeafSize > 0, + MinimumGainSplit > 0.0, + MinimumGainSplit < 1.0, + MaximumDepth >= 0, + convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrownum, X)), + convert_list_to_float_array(LabelsList, array(Ysize, Y)), + trainI(X, Xsize, Xrownum, Y, Ysize, NumClasses, MinimumLeafSize, MinimumGainSplit, MaximumDepth, Entropy). %% Defines the functions that get connected from main.cpp diff --git a/src/methods/decision_tree/decision_tree_test.pl b/src/methods/decision_tree/decision_tree_test.pl index 1bffbdaefd2d8ab44ac11041d764eeae9c0a0b17..209f0a7a3dc547d4b13fc23ae271997592e94592 100644 --- a/src/methods/decision_tree/decision_tree_test.pl +++ b/src/methods/decision_tree/decision_tree_test.pl @@ -1,56 +1,212 @@ + +:- module(decision_tree_tests, [run_decision_tree_tests/0]). + :- use_module(library(plunit)). :- use_module(decision_tree). :- use_module('../../helper_files/helper.pl'). -reset_Model :- - initModel(1,0,50,0.0001). - -:- begin_tests(lists). - -%% alpha tests -test(alpha_std_init) :- - reset_Model, - alpha(0). -test(alpha_wrong_input, fail) :- - reset_Model, - alpha(1). -test(alpha_after_train, A =:= 9223372036854775808) :- - reset_Model, - convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5],3, array(Xsize, Xrownum, X)), - convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), - train(X,Xsize, Xrownum,Y, Ysize), - alpha(A). - -%% train tests -test(correct_train) :- - reset_Model, - convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5],3, array(Xsize, Xrownum, X)), - convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), - train(X,Xsize, Xrownum,Y, Ysize). -test(false_train, fail) :- - reset_Model, - convert_list_to_float_array([],3, array(Xsize, Xrownum, X)), - convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), - train(X,Xsize, Xrownum,Y, Ysize). -test(false_train2, fail) :- - reset_Model, - convert_list_to_float_array([],0, array(Xsize, Xrownum, X)), - convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), - train(X,Xsize, Xrownum,Y, Ysize). -test(false_train3, fail) :- - reset_Model, - convert_list_to_float_array([1,2],0, array(Xsize, Xrownum, X)), - convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), - train(X,Xsize, Xrownum,Y, Ysize). -test(false_train3, fail) :- - reset_Model, - convert_list_to_float_array([1,2,44,3],3, array(Xsize, Xrownum, X)), - convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), - train(X,Xsize, Xrownum,Y, Ysize). -test(false_train4) :- - reset_Model, - convert_list_to_float_array([1,2,44,3],2, array(Xsize, Xrownum, X)), - convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), - train(X,Xsize, Xrownum,Y, Ysize). -:- end_tests(lists). \ No newline at end of file +reset_Model_With_Train :- + initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,1,0,0], 2, 10, 0.5, 0). + +%% +%% TESTING predicate initModel/7 +%% +:- begin_tests(initModel). + +%% Failure Tests + +test(decision_tree_Negative_NumClass, fail) :- + initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], -1, 1, 0.5, 0). + +test(decision_tree_Negative_LeafSize, fail) :- + initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 1, -1, 0.5, 0). + +test(decision_tree_Negative_GainSplit, fail) :- + initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 1, 1, -0.5, 0). + +test(decision_tree_Too_High_GainSplit, fail) :- + initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 1, 1, 1.5, 0). + +test(decision_tree_Negative_MaxDepth, fail) :- + initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 1, 1, 0.5, -1). + +test(decision_tree_Init_With_Wrong_Label_Dims1, [error(_,system_error('Labels Vector is too short or its values are incorrect: should fit into [0,numClasses)!'))]) :- + initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0], 1, 1, 0.5, 1). + +%% If the label vector is to long it seems to cause no problems +test(decision_tree_Init_With_Wrong_Label_Dims2) :- + initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0,0,0], 1, 1, 0.5, 1). + +%% The same when the label values are out of range +test(decision_tree_Init_With_Wrong_Label_Value) :- + initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,5,0,-1], 1, 1, 0.5, 1). + + +%% Successful Tests + +test(initModel_Direkt_Input_Use) :- + initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 2, 10, 0.5, 0). + +test(initModel_Direkt_CSV_Use) :- + open('/home/afkjakhes/eclipse-workspace/prolog-mlpack-libary/src/data_csv/iris2.csv', read, File), + take_csv_row(File, skipFirstRow,10, Data), + initModel(Data, 4, [0,1,0,1,1,0,1,1,1,0], 2, 2, 0.7, 3). + +:- end_tests(initModel). + + +%% +%% TESTING predicate classifyPoint/3 +%% +:- begin_tests(classifyPoint). + +%% Failure Tests + +%% the point dimensionaly seems to not matter for mlpack +%% so im not certain if this should be forced to fail +test(classify_Point_With_Wrong_Dims) :- + reset_Model_With_Train, + classifyPoint([5.1,3.5,1.4,1.2,3.3], Prediction, AssignList), + print(Prediction), + print('\n'), + print(AssignList). + + +%% Successful Tests + +test(classify_Point1) :- + reset_Model_With_Train, + classifyPoint([5.1,3.5,1.4], Prediction, AssignList), + print(Prediction), + print('\n'), + print(AssignList). + +test(classify_Point2) :- + reset_Model_With_Train, + classifyPoint([6.2,1.9,2.3], Prediction, AssignList), + print(Prediction), + print('\n'), + print(AssignList). + +:- end_tests(classifyPoint). + + +%% +%% TESTING predicate classifyMatrix/4 +%% +:- begin_tests(classifyMatrix). + +%% Failure Tests + +%% the point dimensionaly seems to not matter for mlpack +%% so im not certain if this should be forced to fail +test(classify_Matrix_With_Wrong_Dims1) :- + reset_Model_With_Train, + classifyMatrix([3, 2, 0, 5, 1, 4, 1, 0, 4, 3, 3, 5, 0, 5, 5, 2, 5, 5, 0, 2], 5, PredictionList, ProbsList, _), + print(PredictionList), + print('\n'), + print(ProbsList). + +%% the point dimensionaly seems to not matter for mlpack +%% so im not certain if this should be forced to fail +test(classify_Matrix_With_Wrong_Dims2) :- + reset_Model_With_Train, + classifyMatrix([3, 2, 0, 5, 1, 4, 0, 0, 4, 3, 3, 5, 0, 5, 5, 2, 5, 5, 0, 2], 2, PredictionList, ProbsList, _), + print(PredictionList), + print('\n'), + print(ProbsList). + + +%% Successful Tests + +test(classify_Matrix_Wierd_Trained_Labels) :- + initModel([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,5,0,-1], 1, 1, 0.5, 1), + classifyMatrix([5.1,3.5,1.4, 5.1,3.5,1.4, 5.1,3.5,1.4, 5.1,3.5,1.4, 5.1,3.5,1.4], 3, PredictionList, ProbsList, _), + print(PredictionList), + print('\n'), + print(ProbsList). + +test(classify_Matrix_Direkt_Input1) :- + reset_Model_With_Train, + classifyMatrix([5.1,3.5,1.4, 5.1,3.5,1.4, 5.1,3.5,1.4, 5.1,3.5,1.4, 5.1,3.5,1.4], 3, PredictionList, ProbsList, _), + print(PredictionList), + print('\n'), + print(ProbsList). + +test(classify_Matrix_Direkt_Input2) :- + reset_Model_With_Train, + classifyMatrix([2, 2, 3, 5, 1, 4, 1, 1, 4, 0, 3, 5, 0, 5, 5], 3, PredictionList, ProbsList, _), + print(PredictionList), + print('\n'), + print(ProbsList). + +test(classify_Matrix_CSV_Trained) :- + open('/home/afkjakhes/eclipse-workspace/prolog-mlpack-libary/src/data_csv/iris2.csv', read, File), + take_csv_row(File, skipFirstRow,30, Data), + train(Data, 4, [0,1,0,1,1,0,1,1,1,0, 0,1,0,1,1,0,1,1,1,0, 0,1,0,1,1,0,1,1,1,0], 2, 5, 0.0007, 0, _), + classifyMatrix([2, 2, 3, 5, 1, 4, 1, 1, 4, 0, 3, 5, 0, 5, 5, 2, 2, 6, 0, 1], 4, PredictionList, ProbsList, _), + print(PredictionList), + print('\n'), + print(ProbsList). + +:- end_tests(classifyMatrix). + + +%% +%% TESTING predicate train/8 +%% +:- begin_tests(train). + +%% Failure Tests + +test(decision_tree_Train_Negative_NumClass, fail) :- + train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], -1, 1, 0.5, 0, _). + +test(decision_tree_Train_Negative_LeafSize, fail) :- + train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 1, -1, 0.5, 0, _). + +test(decision_tree_Train_Negative_GainSplit, fail) :- + train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 1, 1, -0.5, 0, _). + +test(decision_tree_Train_Too_High_GainSplit, fail) :- + train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 1, 1, 1.5, 0, _). + +test(decision_tree_Train_Negative_MaxDepth, fail) :- + train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 1, 1, 0.5, -1, _). + +test(decision_tree_Train_Wrong_Label_Dims1, [error(_,system_error('DecisionTree::Train(): number of points (4) does not match number of labels (2)!\n'))]) :- + train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0], 1, 1, 0.5, 1, _). + +test(decision_tree_Train_Wrong_Label_Dims2, [error(_,system_error('DecisionTree::Train(): number of points (4) does not match number of labels (6)!\n'))]) :- + train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0,0,0], 1, 1, 0.5, 1, _). + +%% there seems to be no check for the label values +test(decision_tree_Train_Wrong_Labels) :- + train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [-1,0,0,5], 1, 1, 0.5, 1, _). + + +%% Successful Tests + +test(initModel_Direkt_Input_Use, [true(Entropy =:= 0.0)]) :- + train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 2, 10, 0.5, 0, Entropy). + +test(initModel_Direkt_CSV_Use, [true(Entropy =:= 0.48)]) :- + open('/home/afkjakhes/eclipse-workspace/prolog-mlpack-libary/src/data_csv/iris2.csv', read, File), + take_csv_row(File, skipFirstRow,10, Data), + train(Data, 4, [0,1,0,1,1,0,1,1,1,0], 2, 2, 0.7, 3, Entropy). + +test(initModel_Direkt_Input_Use, [true(Entropy =:= 0.0)]) :- + reset_Model_With_Train, + train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 3, 10, 0.7, 0, Entropy). + +test(initModel_Direkt_CSV_Use, [true(Entropy =:= 0.3767195767195767)]) :- + reset_Model_With_Train, + open('/home/afkjakhes/eclipse-workspace/prolog-mlpack-libary/src/data_csv/iris2.csv', read, File), + take_csv_row(File, skipFirstRow,30, Data), + train(Data, 4, [0,1,0,1,1,0,1,1,1,0, 0,1,0,1,1,0,1,1,1,0, 0,1,0,1,1,0,1,1,1,0], 2, 5, 0.0005, 0, Entropy). + +:- end_tests(train). + +run_decision_tree_tests :- + run_tests. diff --git a/test_all.pl b/test_all.pl index 7b5dc51e503bdc3dbdd21445811efc902defc895..e4fe222b3433d0cb3513a86ce1a0826d30c888a7 100644 --- a/test_all.pl +++ b/test_all.pl @@ -4,6 +4,8 @@ :- use_module('src/methods/dbscan/dbscan_test.pl'). +:- use_module('src/methods/decision_tree/decision_tree_test.pl'). + :- use_module('src/methods/sparse_coding/sparse_coding_test.pl'). :- use_module('src/helper_files/helper_tests.pl'). @@ -11,5 +13,6 @@ run :- run_adaboost_tests, run_dbscan_tests, + run_decision_tree_tests, run_sparse_coding_tests, run_helper_tests. \ No newline at end of file