diff --git a/src/methods/naive_bayes_classifier/naive_bayes_classifier.cpp b/src/methods/naive_bayes_classifier/naive_bayes_classifier.cpp index e06b44f0cb167808bf5b6071dbee7961ed4f4409..5502c2dc10676338f80c5842571ca50272451382 100644 --- a/src/methods/naive_bayes_classifier/naive_bayes_classifier.cpp +++ b/src/methods/naive_bayes_classifier/naive_bayes_classifier.cpp @@ -17,6 +17,8 @@ using namespace mlpack::naive_bayes; // Global Variable of the NaiveBayesClassifier object so it can be accessed from all functions NaiveBayesClassifier<mat> naiveBayesClassifier; +bool isModelTrained = false; +int trainedDimensionality = 0; // input: const MatType & data, @@ -50,11 +52,18 @@ void initModelWithTrain(float *dataMatArr, SP_integer dataMatSize, SP_integer da { naiveBayesClassifier = NaiveBayesClassifier(data, labelsVector, numClasses, (incrementalVariance == 1), epsilon); } + catch(const std::out_of_range& e) + { + raisePrologSystemExeption("The given Labels dont fit the format [0,Numclasses-1]!"); + return; + } catch(const std::exception& e) { raisePrologSystemExeption(e.what()); return; } + isModelTrained = true; + trainedDimensionality = data.n_rows; } @@ -76,6 +85,8 @@ void initModelNoTrain(SP_integer dimensionality, SP_integer numClasses, double e raisePrologSystemExeption(e.what()); return; } + isModelTrained = false; + trainedDimensionality = dimensionality; } /* @@ -119,8 +130,20 @@ void classifyMatrix(float *dataMatArr, SP_integer dataMatSize, SP_integer dataMa float **predicArr, SP_integer *predicArrSize, float **probsMatArr, SP_integer *probsMatColNum, SP_integer *probsMatRowNum) { + if (!isModelTrained) + { + raisePrologSystemExeption("The Model is not trained!"); + return; + } // convert the Prolog arrays to arma::mat mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum); + // check if data Dim fits trained Dim + if (data.n_rows != trainedDimensionality) + { + raisePrologSystemExeption("The Datapoints Dimensionality doesnt fit the trained Dimensionality!"); + return; + } + // get the ReturnVector Row<size_t> predicReturnVector; @@ -153,6 +176,11 @@ void classifyMatrix(float *dataMatArr, SP_integer dataMatSize, SP_integer dataMa // void means(float **meansMatArr, SP_integer *meansMatColNum, SP_integer *meansMatRowNum) { + if (!isModelTrained) + { + raisePrologSystemExeption("The Model is not trained!"); + return; + } // get the ReturnMat mat meansReturnMat = naiveBayesClassifier.Means(); @@ -169,6 +197,11 @@ void means(float **meansMatArr, SP_integer *meansMatColNum, SP_integer *meansMat // void probabilities(float **probsMatArr, SP_integer *probsMatColNum, SP_integer *probsMatRowNum) { + if (!isModelTrained) + { + raisePrologSystemExeption("The Model is not trained!"); + return; + } // get the ReturnMat mat probsReturnMat = naiveBayesClassifier.Probabilities(); @@ -199,6 +232,12 @@ void trainMatrix(float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRo raisePrologSystemExeption("The number of data points does not match the number of labels!"); return; } + // check if data Dim fits trained Dim + if (data.n_rows != trainedDimensionality) + { + raisePrologSystemExeption("The Datapoints Dimensionality doesnt fit the trained Dimensionality!"); + return; + } // convert the Prolog array to arma::rowvec Row<size_t> labelsVector = convertArrayToVec(labelsArr, labelsArrSize); @@ -218,6 +257,8 @@ void trainMatrix(float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRo raisePrologSystemExeption(e.what()); return; } + + isModelTrained = true; } @@ -231,7 +272,13 @@ void trainPoint(float *pointArr, SP_integer pointArrSize, SP_integer label) { // convert the Prolog array to arma::rowvec - rowvec pointVector = convertArrayToRowvec(pointArr, pointArrSize); + vec pointVector = convertArrayToColvec(pointArr, pointArrSize); + // check if data Dim fits trained Dim + if (pointVector.size() != trainedDimensionality) + { + raisePrologSystemExeption("The Datapoints Dimensionality doesnt fit the trained Dimensionality!"); + return; + } if(label < 0) { @@ -249,6 +296,7 @@ void trainPoint(float *pointArr, SP_integer pointArrSize, raisePrologSystemExeption(e.what()); return; } + isModelTrained = true; } @@ -259,6 +307,12 @@ void trainPoint(float *pointArr, SP_integer pointArrSize, // void variances(float **variancesMatArr, SP_integer *variancesMatColNum, SP_integer *variancesMatRowNum) { + if (!isModelTrained) + { + raisePrologSystemExeption("The Model is not trained!"); + return; + } + // get the ReturnMat mat variancesReturnMat = naiveBayesClassifier.Variances(); diff --git a/src/methods/naive_bayes_classifier/naive_bayes_classifier.pl b/src/methods/naive_bayes_classifier/naive_bayes_classifier.pl index ea840a47c57cbd13636622eae2544ea90b96b094..0d799fb7da89822e141e2d7b182b21ba2508a282 100644 --- a/src/methods/naive_bayes_classifier/naive_bayes_classifier.pl +++ b/src/methods/naive_bayes_classifier/naive_bayes_classifier.pl @@ -38,6 +38,8 @@ %% Initializes the classifier as per the input and then trains it by calculating the sample mean and variances. %% nbc_initModelWithTrain(DataList, DataRows, LabelsList, NumClasses, IncrementalVar, Epsilon) :- + NumClasses >= 0, + Epsilon >= 0, convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrownum, X)), convert_list_to_float_array(LabelsList, array(Ysize, Y)), initModelWithTrainI(X, Xsize, Xrownum, Y, Ysize, NumClasses, IncrementalVar, Epsilon). @@ -50,8 +52,8 @@ foreign(initModelWithTrain, c, initModelWithTrainI( +pointer(float_array), + %% --Input-- +%% int dimensionality => 1, %% int numClasses => 0, -%% bool incrementalVariance => (1)true / (0)false => false, %% float32 epsilon => 1e-10 %% %% --Output-- @@ -60,10 +62,11 @@ foreign(initModelWithTrain, c, initModelWithTrainI( +pointer(float_array), + %% Initialize the Naive Bayes classifier without performing training. %% All of the parameters of the model will be initialized to zero. Be sure to use train before calling classify, otherwise the results may be meaningless. %% -nbc_initModelNoTrain(NumClasses, IncrementalVar, Epsilon) :- +nbc_initModelNoTrain(Dimensionality, NumClasses, Epsilon) :- + Dimensionality > 0, NumClasses >= 0, Epsilon >= 0, - initModelNoTrainI(NumClasses, IncrementalVar, Epsilon). + initModelNoTrainI(Dimensionality, NumClasses, Epsilon). foreign(initModelNoTrain, c, initModelNoTrainI( +integer, +integer, diff --git a/src/methods/naive_bayes_classifier/naive_bayes_classifier_test.pl b/src/methods/naive_bayes_classifier/naive_bayes_classifier_test.pl index 541088cb90c25ba3c1889fb35f48380ffb99ebbb..7db8158f4d11593cf93f8bc3f870c48f665fd91b 100644 --- a/src/methods/naive_bayes_classifier/naive_bayes_classifier_test.pl +++ b/src/methods/naive_bayes_classifier/naive_bayes_classifier_test.pl @@ -7,7 +7,7 @@ :- use_module('../../helper_files/helper.pl'). reset_Model_NoTrain :- - nbc_initModelNoTrain(2, 0, 0.000001). + nbc_initModelNoTrain(3, 0, 0.000001). reset_Model_WithTrain :- nbc_initModelWithTrain([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,1,0,1], 2, 0, 0.000001). @@ -18,18 +18,21 @@ reset_Model_WithTrain :- :- begin_tests(nbc_initModelNoTrain). %% Failure Tests + +test(nbc_InitModelNoTrain_Negative_Dimensionality, fail) :- + nbc_initModelNoTrain(-3, 0, 0.000001). test(nbc_InitModelNoTrain_Negative_NumClasses, fail) :- - nbc_initModelNoTrain(-2, 0, 0.000001). + nbc_initModelNoTrain(3, -1, 0.000001). test(nbc_InitModelNoTrain_Negative_Epsilon, fail) :- - nbc_initModelNoTrain(2, 0, -0.0001). + nbc_initModelNoTrain(3, 0, -0.0001). %% Successful Tests test(nbc_InitModelNoTrain_Normal_Use) :- - nbc_initModelNoTrain(2, 0, 0.000001). + nbc_initModelNoTrain(3, 0, 0.000001). :- end_tests(nbc_initModelNoTrain). @@ -48,13 +51,14 @@ test(nbc_InitModelWithTrain_Negative_NumClasses, fail) :- test(nbc_InitModelWithTrain_Negative_Epsilon, fail) :- nbc_initModelWithTrain([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,1,0,1], 2, 0, -0.000001). -test(nbc_InitModelWithTrain_Too_Short_Label, [error(_,system_error('Error'))]) :- + +test(nbc_InitModelWithTrain_Too_Short_Label, [error(_,system_error('The number of data points does not match the number of labels!'))]) :- nbc_initModelWithTrain([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,1], 2, 0, 0.000001). -test(nbc_InitModelWithTrain_Too_Long_Label, [error(_,system_error('Error'))]) :- +test(nbc_InitModelWithTrain_Too_Long_Label, [error(_,system_error('The number of data points does not match the number of labels!'))]) :- nbc_initModelWithTrain([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,1,0,1,0,1], 2, 0, 0.000001). -test(nbc_InitModelWithTrain_Too_Many_Label_Classes, [error(_,system_error('Error'))]) :- +test(nbc_InitModelWithTrain_Too_Many_Label_Classes, [error(_,system_error('The given Labels dont fit the format [0,Numclasses-1]!'))]) :- nbc_initModelWithTrain([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,1,2,3], 2, 0, 0.000001). @@ -79,11 +83,11 @@ test(nbc_InitModelWithTrain_CSV_Input) :- %% Failure Tests -test(nbc_ClassifyMatrix_Before_Train, [error(_,system_error('Error'))]) :- +test(nbc_ClassifyMatrix_Before_Train, [error(_,system_error('The Model is not trained!'))]) :- reset_Model_NoTrain, nbc_classifyMatrix([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, _, _, _). -test(nbc_ClassifyMatrix_Diffrent_Dims_To_Train, [error(_,system_error('Error'))]) :- +test(nbc_ClassifyMatrix_Diffrent_Dims_To_Train, [error(_,system_error('The Datapoints Dimensionality doesnt fit the trained Dimensionality!'))]) :- reset_Model_WithTrain, nbc_classifyMatrix([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 4, _, _, _). @@ -99,7 +103,7 @@ test(nbc_ClassifyMatrix_Normal_Use) :- print(ProbabilitiesList). test(nbc_ClassifyMatrix_CSV_Input) :- - reset_Model_WithTrain, + nbc_initModelWithTrain([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 4, [0,1,0], 2, 0, 0.000001), open('src/data_csv/iris2.csv', read, File), take_csv_row(File, skipFirstRow,10, Data), nbc_classifyMatrix(Data, 4, PredictionList, _, ProbabilitiesList), @@ -118,7 +122,7 @@ test(nbc_ClassifyMatrix_CSV_Input) :- :- begin_tests(nbc_means). %% Failure Tests -test(nbc_Means_Before_Train, [error(_,system_error('Error'))]) :- +test(nbc_Means_Before_Train, [error(_,system_error('The Model is not trained!'))]) :- reset_Model_NoTrain, nbc_means(MeansList, _), print('\nMeans: '), @@ -143,7 +147,7 @@ test(nbc_Means_Normal_Use) :- :- begin_tests(nbc_probabilities). %% Failure Tests -test(nbc_Probabilities_Before_Train, [error(_,system_error('Error'))]) :- +test(nbc_Probabilities_Before_Train, [error(_,system_error('The Model is not trained!'))]) :- reset_Model_NoTrain, nbc_probabilities(ProbabilitiesList, _), print('\nProbabilities: '), @@ -185,7 +189,7 @@ test(nbc_TrainMatrix_Too_Many_Label_Classes, [error(_,system_error('The given La reset_Model_NoTrain, nbc_trainMatrix([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,1,2,3], 2, 0). -test(nbc_TrainMatrix_After_InitTrain, [error(_,system_error('addition: incompatible matrix dimensions: 3x1 and 4x1'))]) :- +test(nbc_TrainMatrix_After_InitTrain, [error(_,system_error('The Datapoints Dimensionality doesnt fit the trained Dimensionality!'))]) :- reset_Model_WithTrain, nbc_trainMatrix([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 4, [0,1,0], 2, 0). @@ -198,7 +202,7 @@ test(nbc_TrainMatrix_Normal_Use) :- test(nbc_TrainMatrix_CSV_Input) :- - reset_Model_NoTrain, + nbc_initModelNoTrain(4, 0, 0.000001), open('src/data_csv/iris2.csv', read, File), take_csv_row(File, skipFirstRow,10, Data), nbc_trainMatrix(Data, 4, [0,1,0,1,1,0,1,1,1,0], 2, 0). @@ -214,11 +218,15 @@ test(nbc_TrainMatrix_CSV_Input) :- %% Failure Tests -test(nbc_TrainPoint_Bad_Label, [error(_,system_error('Error'))]) :- +test(nbc_TrainPoint_Bad_Label, [error(_,system_error('The given Label should be positive!'))]) :- reset_Model_NoTrain, nbc_trainPoint([5.1,3.5,1.4], -5). -test(nbc_TrainPoint_Too_Long_Point, [error(_,system_error('Error'))]) :- +test(nbc_TrainPoint_Different_Dim, [error(_,system_error('The Datapoints Dimensionality doesnt fit the trained Dimensionality!'))]) :- + reset_Model_NoTrain, + nbc_trainPoint([5.1,3.5,1.4,3.5], 0). + +test(nbc_TrainPoint_Too_Long_Point, [error(_,system_error('The Datapoints Dimensionality doesnt fit the trained Dimensionality!'))]) :- reset_Model_WithTrain, nbc_trainPoint([5.1,3.5,1.4,3.5], 0). @@ -243,7 +251,7 @@ test(nbc_TrainPoint_After_InitTrain) :- :- begin_tests(nbc_variances). %% Failure Tests -test(nbc_Variances_Before_Train, [error(_,system_error('Error'))]) :- +test(nbc_Variances_Before_Train, [error(_,system_error('The Model is not trained!'))]) :- reset_Model_NoTrain, nbc_variances(VariancesList, _), print('\nVariances: '),