diff --git a/src/methods/kmeans/kmeans.cpp b/src/methods/kmeans/kmeans.cpp index 41dda472e0cfa19794649dd3648d34dc6bfa1ed3..fa903b7ff4ef457a33c14255a7923a3d68a98bec 100644 --- a/src/methods/kmeans/kmeans.cpp +++ b/src/methods/kmeans/kmeans.cpp @@ -34,102 +34,87 @@ using namespace mlpack::metric; // internal Funktion // initiats KMeans with the given Class Template and inputs the params and returns the results template< template<class,class> class LloydStepType=NaiveKMeans> -void InitAndClusterKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters, - float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +void InitAndClusterKMeans(SP_integer maxIterations, + const char *initialPartition, + const char *emptyCluster, + float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, + SP_integer clusters, + float **assignmentsArr, SP_integer *assignmentsArrSize, + float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) { - // convert the Prolog arrays to arma::mat + // convert the Prolog array to arma::mat mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum); - // get the ReturnVector - arma::Row<size_t> toReturnVector; + // create the ReturnVector + arma::Row<size_t> assignmentsReturnVector; + // create the ReturnMat + mat centroidsReturnMat; - // get the ReturnMat - mat toReturnMat; + // Initialize the model with the given Policies and cluster the data with Kmeans - switch (initialPartition) + if (strcmp(initialPartition, "sampleInitialization") == 0) { - case 0: // SampleInitialization - cout << "0" << endl; - switch (emptyCluster) + if (strcmp(emptyCluster, "maxVarianceNewCluster") == 0) { - case 0: // MaxVarianceNewCluster - cout << "0" << endl; - KMeans<EuclideanDistance, SampleInitialization, MaxVarianceNewCluster, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false); - break; - - case 1: + KMeans<EuclideanDistance, SampleInitialization, MaxVarianceNewCluster, LloydStepType>(maxIterations).Cluster(data, clusters, assignmentsReturnVector, centroidsReturnMat, false, false); + } + else if (strcmp(emptyCluster, "killEmptyCluster") == 0) + { // KillEmptyClusters - cout << "1" << endl; - KMeans<EuclideanDistance, SampleInitialization, KillEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false); - break; - - case 2: + KMeans<EuclideanDistance, SampleInitialization, KillEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, assignmentsReturnVector, centroidsReturnMat, false, false); + } + else if (strcmp(emptyCluster, "allowEmptyCluster") == 0) + { // AllowEmptyClusters - cout << "2" << endl; - KMeans<EuclideanDistance, SampleInitialization, AllowEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false); - break; - - default: - // Wrong input - cout << "wrong input of emptyCluster, should be 0, 1 or 2" << endl; - break; + KMeans<EuclideanDistance, SampleInitialization, AllowEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, assignmentsReturnVector, centroidsReturnMat, false, false); } - break; - - case 1: + else + { + raisePrologDomainExeption(emptyCluster, 3, "The given EmptyClusterPolicy is unknown!", "KMeans"); + return; + } + } + else if (strcmp(initialPartition, "randomPartition") == 0) + { // RandomPartition - cout << "1" << endl; - switch (emptyCluster) + if (strcmp(emptyCluster, "maxVarianceNewCluster") == 0) { - case 0: // MaxVarianceNewCluster - cout << "0" << endl; - KMeans<EuclideanDistance, RandomPartition, MaxVarianceNewCluster, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false); - break; - - case 1: + KMeans<EuclideanDistance, RandomPartition, MaxVarianceNewCluster, LloydStepType>(maxIterations).Cluster(data, clusters, assignmentsReturnVector, centroidsReturnMat, false, false); + } + else if (strcmp(emptyCluster, "killEmptyCluster") == 0) + { // KillEmptyClusters - cout << "1" << endl; - KMeans<EuclideanDistance, RandomPartition, KillEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false); - break; - - case 2: + KMeans<EuclideanDistance, RandomPartition, KillEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, assignmentsReturnVector, centroidsReturnMat, false, false); + } + else if (strcmp(emptyCluster, "allowEmptyCluster") == 0) + { // AllowEmptyClusters - cout << "2" << endl; - KMeans<EuclideanDistance, RandomPartition, AllowEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, toReturnVector, toReturnMat, false, false); - break; - - default: - // Wrong input - cout << "wrong input of emptyCluster, should be 0, 1 or 2" << endl; - break; + KMeans<EuclideanDistance, RandomPartition, AllowEmptyClusters, LloydStepType>(maxIterations).Cluster(data, clusters, assignmentsReturnVector, centroidsReturnMat, false, false); + } + else + { + raisePrologDomainExeption(emptyCluster, 3, "The given EmptyClusterPolicy is unknown!", "KMeans"); + return; } - break; - - default: - // Wrong input - cout << "wrong input of initialPartition, should be 0 or 1" << endl; - break; } + else + { + raisePrologDomainExeption(initialPartition, 2, "The given InitialPartitionPolicy is unknown!", "KMeans"); + return; + } + - // return the Vector lenght - *assignmentsArrSize = toReturnVector.n_elem; - - // return the Vector as Array - *assignmentsArr = convertToArray(toReturnVector); - - // return the Matrix dimensions - *centroidsMatColNum = toReturnMat.n_cols; - *centroidsMatRowNum = toReturnMat.n_rows; - - // return the Matrix as one long Array - *centroidsMatArr = convertToArray(toReturnMat); + // return the Vector + returnVectorInformation(assignmentsReturnVector, assignmentsArr, assignmentsArrSize); + // return the Matrix + returnMatrixInformation(centroidsReturnMat, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); } -// TODO: + // input: const size_t maxIterations = 1000, // const InitialPartitionPolicy partitioner = InitialPartitionPolicy(), // const EmptyClusterPolicy emptyClusterAction = EmptyClusterPolicy() @@ -142,14 +127,21 @@ void InitAndClusterKMeans(SP_integer maxIterations, SP_integer initialPartition, // const bool initialCentroidGuess = false // output: // description: -void naiveKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters, - float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +// Runs kmeans with naive as the algorithm for the Lloyd iteration. +// +void naiveKMeans(SP_integer maxIterations, + const char *initialPartition, + const char *emptyCluster, + float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, + SP_integer clusters, + float **assignmentsArr, SP_integer *assignmentsArrSize, + float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) { InitAndClusterKMeans<NaiveKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters, assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); } -// TODO: + // input: const size_t maxIterations = 1000, // const InitialPartitionPolicy partitioner = InitialPartitionPolicy(), // const EmptyClusterPolicy emptyClusterAction = EmptyClusterPolicy() @@ -162,14 +154,21 @@ void naiveKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integ // const bool initialCentroidGuess = false // output: // description: -void dualTreeKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters, - float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +// Runs kmeans with dualTree as the algorithm for the Lloyd iteration. +// +void dualTreeKMeans(SP_integer maxIterations, + const char *initialPartition, + const char *emptyCluster, + float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, + SP_integer clusters, + float **assignmentsArr, SP_integer *assignmentsArrSize, + float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) { InitAndClusterKMeans<DualTreeKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters, assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); } -// TODO: + // input: const size_t maxIterations = 1000, // const InitialPartitionPolicy partitioner = InitialPartitionPolicy(), // const EmptyClusterPolicy emptyClusterAction = EmptyClusterPolicy() @@ -182,14 +181,21 @@ void dualTreeKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_in // const bool initialCentroidGuess = false // output: // description: -void elkanKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters, - float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +// Runs kmeans with elkan as the algorithm for the Lloyd iteration. +// +void elkanKMeans(SP_integer maxIterations, + const char *initialPartition, + const char *emptyCluster, + float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, + SP_integer clusters, + float **assignmentsArr, SP_integer *assignmentsArrSize, + float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) { InitAndClusterKMeans<ElkanKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters, assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); } -// TODO: + // input: const size_t maxIterations = 1000, // const InitialPartitionPolicy partitioner = InitialPartitionPolicy(), // const EmptyClusterPolicy emptyClusterAction = EmptyClusterPolicy() @@ -202,16 +208,21 @@ void elkanKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integ // const bool initialCentroidGuess = false // output: // description: -void hamerlyKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters, - float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +// Runs kmeans with hamerly as the algorithm for the Lloyd iteration. +// +void hamerlyKMeans(SP_integer maxIterations, + const char *initialPartition, + const char *emptyCluster, + float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, + SP_integer clusters, + float **assignmentsArr, SP_integer *assignmentsArrSize, + float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) { InitAndClusterKMeans<HamerlyKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters, assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); } - -// TODO: // input: const size_t maxIterations = 1000, // const InitialPartitionPolicy partitioner = InitialPartitionPolicy(), // const EmptyClusterPolicy emptyClusterAction = EmptyClusterPolicy() @@ -224,9 +235,16 @@ void hamerlyKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_int // const bool initialCentroidGuess = false // output: // description: -void pellegMooreKMeans(SP_integer maxIterations, SP_integer initialPartition, SP_integer emptyCluster, float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, SP_integer clusters, - float **assignmentsArr, SP_integer *assignmentsArrSize, float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) +// Runs kmeans with pelleg Moore as the algorithm for the Lloyd iteration. +// +void pellegMooreKMeans(SP_integer maxIterations, + const char *initialPartition, + const char *emptyCluster, + float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, + SP_integer clusters, + float **assignmentsArr, SP_integer *assignmentsArrSize, + float **centroidsMatArr, SP_integer *centroidsMatColNum, SP_integer *centroidsMatRowNum) { InitAndClusterKMeans<PellegMooreKMeans>(maxIterations, initialPartition, emptyCluster, dataMatArr, dataMatSize, dataMatRowNum, clusters, assignmentsArr, assignmentsArrSize, centroidsMatArr, centroidsMatColNum, centroidsMatRowNum); -} \ No newline at end of file +} diff --git a/src/methods/kmeans/kmeans.pl b/src/methods/kmeans/kmeans.pl index 97d07991a27c419d428b704b1fe89c2caeffaefd..d0a83fafdae2110f6bbf9b2e0a7fe8f4f0fac5ec 100644 --- a/src/methods/kmeans/kmeans.pl +++ b/src/methods/kmeans/kmeans.pl @@ -1,9 +1,9 @@ -:- module(kmeans, [ naiveKMeans/12, - dualTreeKMeans/12, - elkanKMeans/12, - hamerlyKMeans/12, - pellegMooreKMeans/12]). +:- module(kmeans, [ naiveKMeans/9, + dualTreeKMeans/9, + elkanKMeans/9, + hamerlyKMeans/9, + pellegMooreKMeans/9]). %% requirements of library(struct) :- load_files(library(str_decl), @@ -23,8 +23,8 @@ %% --Input-- %% int maxIterations => 1000, -%% string initialPartition => "SampleInitialzation", "RandomPartition", -%% string emptyCluster => "MaxVarianceNewCluster", "KillEmptyCluster", "AllowEmptyCluster", +%% string initialPartition => "sampleInitialization", "randomPartition", +%% string emptyCluster => "maxVarianceNewCluster", "killEmptyCluster", "allowEmptyCluster", %% mat data, %% int clusters => 0 %% @@ -35,7 +35,15 @@ %% --Description-- %% Runs kmeans with naive as the algorithm for the Lloyd iteration. %% -foreign(naiveKMeans, c, naiveKMeans( +integer, +integer, +integer, +naiveKMeans(MaxIterations, InitialPartition, EmptyCluster, DataList, DataRows, Clusters, AssignmentsList, CentroidsList, ZCols) :- + MaxIterations >= 0, + Clusters > 0, + convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrows, X)), + naiveKMeansI(MaxIterations, InitialPartition, EmptyCluster, X, Xsize, Xrows, Clusters, Y, Ysize, Z, ZCols, ZRows), + convert_float_array_to_list(Y, Ysize, AssignmentsList), + convert_float_array_to_2d_list(Z, ZCols, ZRows, CentroidsList). + +foreign(naiveKMeans, c, naiveKMeansI( +integer, +string, +string, +pointer(float_array), +integer, +integer, +integer, -pointer(float_array), -integer, @@ -44,8 +52,8 @@ foreign(naiveKMeans, c, naiveKMeans( +integer, +integer, +integer, %% --Input-- %% int maxIterations => 1000, -%% string initialPartition => "SampleInitialzation", "RandomPartition", -%% string emptyCluster => "MaxVarianceNewCluster", "KillEmptyCluster", "AllowEmptyCluster", +%% string initialPartition => "sampleInitialization", "randomPartition", +%% string emptyCluster => "maxVarianceNewCluster", "killEmptyCluster", "allowEmptyCluster", %% mat data, %% int clusters => 0 %% @@ -56,7 +64,15 @@ foreign(naiveKMeans, c, naiveKMeans( +integer, +integer, +integer, %% --Description-- %% Runs kmeans with dualTree as the algorithm for the Lloyd iteration. %% -foreign(dualTreeKMeans, c, dualTreeKMeans( +integer, +integer, +integer, +dualTreeKMeans(MaxIterations, InitialPartition, EmptyCluster, DataList, DataRows, Clusters, AssignmentsList, CentroidsList, ZCols) :- + MaxIterations >= 0, + Clusters > 0, + convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrows, X)), + dualTreeKMeansI(MaxIterations, InitialPartition, EmptyCluster, X, Xsize, Xrows, Clusters, Y, Ysize, Z, ZCols, ZRows), + convert_float_array_to_list(Y, Ysize, AssignmentsList), + convert_float_array_to_2d_list(Z, ZCols, ZRows, CentroidsList). + +foreign(dualTreeKMeans, c, dualTreeKMeansI( +integer, +string, +string, +pointer(float_array), +integer, +integer, +integer, -pointer(float_array), -integer, @@ -65,8 +81,8 @@ foreign(dualTreeKMeans, c, dualTreeKMeans( +integer, +integer, +integer, %% --Input-- %% int maxIterations => 1000, -%% string initialPartition => "SampleInitialzation", "RandomPartition", -%% string emptyCluster => "MaxVarianceNewCluster", "KillEmptyCluster", "AllowEmptyCluster", +%% string initialPartition => "sampleInitialization", "randomPartition", +%% string emptyCluster => "maxVarianceNewCluster", "killEmptyCluster", "allowEmptyCluster", %% mat data, %% int clusters => 0 %% @@ -77,7 +93,15 @@ foreign(dualTreeKMeans, c, dualTreeKMeans( +integer, +integer, +integer, %% --Description-- %% Runs kmeans with elkan as the algorithm for the Lloyd iteration. %% -foreign(elkanKMeans, c, elkanKMeans( +integer, +integer, +integer, +elkanKMeans(MaxIterations, InitialPartition, EmptyCluster, DataList, DataRows, Clusters, AssignmentsList, CentroidsList, ZCols) :- + MaxIterations >= 0, + Clusters > 0, + convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrows, X)), + elkanKMeansI(MaxIterations, InitialPartition, EmptyCluster, X, Xsize, Xrows, Clusters, Y, Ysize, Z, ZCols, ZRows), + convert_float_array_to_list(Y, Ysize, AssignmentsList), + convert_float_array_to_2d_list(Z, ZCols, ZRows, CentroidsList). + +foreign(elkanKMeans, c, elkanKMeansI( +integer, +string, +string, +pointer(float_array), +integer, +integer, +integer, -pointer(float_array), -integer, @@ -86,8 +110,8 @@ foreign(elkanKMeans, c, elkanKMeans( +integer, +integer, +integer, %% --Input-- %% int maxIterations => 1000, -%% string initialPartition => "SampleInitialzation", "RandomPartition", -%% string emptyCluster => "MaxVarianceNewCluster", "KillEmptyCluster", "AllowEmptyCluster", +%% string initialPartition => "sampleInitialization", "randomPartition", +%% string emptyCluster => "maxVarianceNewCluster", "killEmptyCluster", "allowEmptyCluster", %% mat data, %% int clusters => 0 %% @@ -98,17 +122,25 @@ foreign(elkanKMeans, c, elkanKMeans( +integer, +integer, +integer, %% --Description-- %% Runs kmeans with hamerly as the algorithm for the Lloyd iteration. %% -foreign(hamerlyKMeans, c, hamerlyKMeans(+integer, +integer, +integer, - +pointer(float_array), +integer, +integer, - +integer, - -pointer(float_array), -integer, - -pointer(float_array), -integer, -integer)). +hamerlyKMeans(MaxIterations, InitialPartition, EmptyCluster, DataList, DataRows, Clusters, AssignmentsList, CentroidsList, ZCols) :- + MaxIterations >= 0, + Clusters > 0, + convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrows, X)), + hamerlyKMeansI(MaxIterations, InitialPartition, EmptyCluster, X, Xsize, Xrows, Clusters, Y, Ysize, Z, ZCols, ZRows), + convert_float_array_to_list(Y, Ysize, AssignmentsList), + convert_float_array_to_2d_list(Z, ZCols, ZRows, CentroidsList). + +foreign(hamerlyKMeans, c, hamerlyKMeansI( +integer, +string, +string, + +pointer(float_array), +integer, +integer, + +integer, + -pointer(float_array), -integer, + -pointer(float_array), -integer, -integer)). %% --Input-- %% int maxIterations => 1000, -%% string initialPartition => "SampleInitialzation", "RandomPartition", -%% string emptyCluster => "MaxVarianceNewCluster", "KillEmptyCluster", "AllowEmptyCluster", +%% string initialPartition => "sampleInitialization", "randomPartition", +%% string emptyCluster => "maxVarianceNewCluster", "killEmptyCluster", "allowEmptyCluster", %% mat data, %% int clusters => 0 %% @@ -119,11 +151,19 @@ foreign(hamerlyKMeans, c, hamerlyKMeans(+integer, +integer, +integer, %% --Description-- %% Runs kmeans with pelleg Moore as the algorithm for the Lloyd iteration. %% -foreign(pellegMooreKMeans, c, pellegMooreKMeans(+integer, +integer, +integer, - +pointer(float_array), +integer, +integer, - +integer, - -pointer(float_array), -integer, - -pointer(float_array), -integer, -integer)). +pellegMooreKMeans(MaxIterations, InitialPartition, EmptyCluster, DataList, DataRows, Clusters, AssignmentsList, CentroidsList, ZCols) :- + MaxIterations >= 0, + Clusters > 0, + convert_list_to_float_array(DataList, DataRows, array(Xsize, Xrows, X)), + pellegMooreKMeansI(MaxIterations, InitialPartition, EmptyCluster, X, Xsize, Xrows, Clusters, Y, Ysize, Z, ZCols, ZRows), + convert_float_array_to_list(Y, Ysize, AssignmentsList), + convert_float_array_to_2d_list(Z, ZCols, ZRows, CentroidsList). + +foreign(pellegMooreKMeans, c, pellegMooreKMeansI( +integer, +string, +string, + +pointer(float_array), +integer, +integer, + +integer, + -pointer(float_array), -integer, + -pointer(float_array), -integer, -integer)). %% Defines the functions that get connected from main.cpp diff --git a/src/methods/kmeans/kmeans_test.pl b/src/methods/kmeans/kmeans_test.pl index 65c70127189fd3cc765f93a31edfc90462de97d1..21a71d1c4e280fc5ca06df3d84f4f17a4d489984 100644 --- a/src/methods/kmeans/kmeans_test.pl +++ b/src/methods/kmeans/kmeans_test.pl @@ -8,34 +8,397 @@ %% -%% TESTING predicate predicate/10 +%% TESTING predicate naiveKMeans/9 %% -:- begin_tests(predicate). +:- begin_tests(naiveKMeans). %% Failure Tests - -test(testDescription, [error(domain_error('expectation' , culprit), _)]) :- - reset_Model_No_Train(perceptron), - train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 2, culprit, 50, 0.0001, _). - -test(testDescription2, [error(_,system_error('The values of the Label have to start at 0 and be >= 0 and < the given numClass!'))]) :- - reset_Model_No_Train(perceptron), - train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,1,0,2], 2, perceptron, 50, 0.0001, _). + +test(naiveKMeans_Negative_MaxIterations, fail) :- + naiveKMeans(-1000, sampleInitialization, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(naiveKMeans_Wrong_InitialPartition_Input, [error(domain_error('The given InitialPartitionPolicy is unknown!' , wrongInput), _)]) :- + naiveKMeans(1000, wrongInput, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(naiveKMeans_Wrong_EmptyCluster_Input, [error(domain_error('The given EmptyClusterPolicy is unknown!' , wrongInput), _)]) :- + naiveKMeans(1000, sampleInitialization, wrongInput, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(naiveKMeans_Negative_Clusters, fail) :- + naiveKMeans(1000, sampleInitialization, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, -2, _, _, _). + + +%% Successful Tests + +test(naiveKMeans_Sample_MaxVariance) :- + naiveKMeans(1000, sampleInitialization, maxVarianceNewCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(naiveKMeans_Sample_KillEmpty) :- + naiveKMeans(1000, sampleInitialization, killEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(naiveKMeans_Sample_AllowEmpty) :- + naiveKMeans(1000, sampleInitialization, allowEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(naiveKMeans_Random_MaxVariance) :- + naiveKMeans(1000, randomPartition, maxVarianceNewCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(naiveKMeans_Random_KillEmpty) :- + naiveKMeans(1000, randomPartition, killEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(naiveKMeans_Random_AllowEmpty) :- + naiveKMeans(1000, randomPartition, allowEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + + + +test(naiveKMeans_CSV_Input) :- + open('src/data_csv/iris2.csv', read, File), + take_csv_row(File, skipFirstRow, 30, Data), + naiveKMeans(100, randomPartition, allowEmptyCluster, Data, 4, 3, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +:- end_tests(naiveKMeans). + + + +%% +%% TESTING predicate dualTreeKMeans/9 +%% +:- begin_tests(dualTreeKMeans). + +%% Failure Tests + +test(dualTreeKMeans_Negative_MaxIterations, fail) :- + dualTreeKMeans(-1000, sampleInitialization, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(dualTreeKMeans_Wrong_InitialPartition_Input, [error(domain_error('The given InitialPartitionPolicy is unknown!' , wrongInput), _)]) :- + dualTreeKMeans(1000, wrongInput, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(dualTreeKMeans_Wrong_EmptyCluster_Input, [error(domain_error('The given EmptyClusterPolicy is unknown!' , wrongInput), _)]) :- + dualTreeKMeans(1000, sampleInitialization, wrongInput, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(dualTreeKMeans_Negative_Clusters, fail) :- + dualTreeKMeans(1000, sampleInitialization, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, -2, _, _, _). + + +%% Successful Tests + +test(dualTreeKMeans_Sample_MaxVariance) :- + dualTreeKMeans(1000, sampleInitialization, maxVarianceNewCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(dualTreeKMeans_Sample_KillEmpty) :- + dualTreeKMeans(1000, sampleInitialization, killEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(dualTreeKMeans_Sample_AllowEmpty) :- + dualTreeKMeans(1000, sampleInitialization, allowEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(dualTreeKMeans_Random_MaxVariance) :- + dualTreeKMeans(1000, randomPartition, maxVarianceNewCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(dualTreeKMeans_Random_KillEmpty) :- + dualTreeKMeans(1000, randomPartition, killEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(dualTreeKMeans_Random_AllowEmpty) :- + dualTreeKMeans(1000, randomPartition, allowEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + + + +test(dualTreeKMeans_CSV_Input) :- + open('src/data_csv/iris2.csv', read, File), + take_csv_row(File, skipFirstRow, 30, Data), + dualTreeKMeans(100, randomPartition, allowEmptyCluster, Data, 4, 3, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +:- end_tests(dualTreeKMeans). + + + +%% +%% TESTING predicate elkanKMeans/9 +%% +:- begin_tests(elkanKMeans). + +%% Failure Tests + +test(elkanKMeans_Negative_MaxIterations, fail) :- + elkanKMeans(-1000, sampleInitialization, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(elkanKMeans_Wrong_InitialPartition_Input, [error(domain_error('The given InitialPartitionPolicy is unknown!' , wrongInput), _)]) :- + elkanKMeans(1000, wrongInput, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(elkanKMeans_Wrong_EmptyCluster_Input, [error(domain_error('The given EmptyClusterPolicy is unknown!' , wrongInput), _)]) :- + elkanKMeans(1000, sampleInitialization, wrongInput, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(elkanKMeans_Negative_Clusters, fail) :- + elkanKMeans(1000, sampleInitialization, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, -2, _, _, _). %% Successful Tests -test(testDescription3, [true(Error =:= 1)]) :- - reset_Model_No_Train(perceptron), - train([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, [0,0,0,0], 2, perceptron, 50, 0.0001, Error). +test(elkanKMeans_Sample_MaxVariance) :- + elkanKMeans(1000, sampleInitialization, maxVarianceNewCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(elkanKMeans_Sample_KillEmpty) :- + elkanKMeans(1000, sampleInitialization, killEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(elkanKMeans_Sample_AllowEmpty) :- + elkanKMeans(1000, sampleInitialization, allowEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(elkanKMeans_Random_MaxVariance) :- + elkanKMeans(1000, randomPartition, maxVarianceNewCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(elkanKMeans_Random_KillEmpty) :- + elkanKMeans(1000, randomPartition, killEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(elkanKMeans_Random_AllowEmpty) :- + elkanKMeans(1000, randomPartition, allowEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + + + +test(elkanKMeans_CSV_Input) :- + open('src/data_csv/iris2.csv', read, File), + take_csv_row(File, skipFirstRow, 30, Data), + elkanKMeans(100, randomPartition, allowEmptyCluster, Data, 4, 3, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +:- end_tests(elkanKMeans). + + + +%% +%% TESTING predicate hamerlyKMeans/9 +%% +:- begin_tests(hamerlyKMeans). + +%% Failure Tests + +test(hamerlyKMeans_Negative_MaxIterations, fail) :- + hamerlyKMeans(-1000, sampleInitialization, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(hamerlyKMeans_Wrong_InitialPartition_Input, [error(domain_error('The given InitialPartitionPolicy is unknown!' , wrongInput), _)]) :- + hamerlyKMeans(1000, wrongInput, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(hamerlyKMeans_Wrong_EmptyCluster_Input, [error(domain_error('The given EmptyClusterPolicy is unknown!' , wrongInput), _)]) :- + hamerlyKMeans(1000, sampleInitialization, wrongInput, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(hamerlyKMeans_Negative_Clusters, fail) :- + hamerlyKMeans(1000, sampleInitialization, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, -2, _, _, _). + + +%% Successful Tests + +test(hamerlyKMeans_Sample_MaxVariance) :- + hamerlyKMeans(1000, sampleInitialization, maxVarianceNewCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(hamerlyKMeans_Sample_KillEmpty) :- + hamerlyKMeans(1000, sampleInitialization, killEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(hamerlyKMeans_Sample_AllowEmpty) :- + hamerlyKMeans(1000, sampleInitialization, allowEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(hamerlyKMeans_Random_MaxVariance) :- + hamerlyKMeans(1000, randomPartition, maxVarianceNewCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(hamerlyKMeans_Random_KillEmpty) :- + hamerlyKMeans(1000, randomPartition, killEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(hamerlyKMeans_Random_AllowEmpty) :- + hamerlyKMeans(1000, randomPartition, allowEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + + + +test(hamerlyKMeans_CSV_Input) :- + open('src/data_csv/iris2.csv', read, File), + take_csv_row(File, skipFirstRow, 30, Data), + hamerlyKMeans(100, randomPartition, allowEmptyCluster, Data, 4, 3, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +:- end_tests(hamerlyKMeans). + + + +%% +%% TESTING predicate pellegMooreKMeans/9 +%% +:- begin_tests(pellegMooreKMeans). + +%% Failure Tests + +test(pellegMooreKMeans_Negative_MaxIterations, fail) :- + pellegMooreKMeans(-1000, sampleInitialization, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(pellegMooreKMeans_Wrong_InitialPartition_Input, [error(domain_error('The given InitialPartitionPolicy is unknown!' , wrongInput), _)]) :- + pellegMooreKMeans(1000, wrongInput, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(pellegMooreKMeans_Wrong_EmptyCluster_Input, [error(domain_error('The given EmptyClusterPolicy is unknown!' , wrongInput), _)]) :- + pellegMooreKMeans(1000, sampleInitialization, wrongInput, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, _, _, _). + +test(pellegMooreKMeans_Negative_Clusters, fail) :- + pellegMooreKMeans(1000, sampleInitialization, randomPartition, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, -2, _, _, _). + + +%% Successful Tests + +test(pellegMooreKMeans_Sample_MaxVariance) :- + pellegMooreKMeans(1000, sampleInitialization, maxVarianceNewCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(pellegMooreKMeans_Sample_KillEmpty) :- + pellegMooreKMeans(1000, sampleInitialization, killEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(pellegMooreKMeans_Sample_AllowEmpty) :- + pellegMooreKMeans(1000, sampleInitialization, allowEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(pellegMooreKMeans_Random_MaxVariance) :- + pellegMooreKMeans(1000, randomPartition, maxVarianceNewCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(pellegMooreKMeans_Random_KillEmpty) :- + pellegMooreKMeans(1000, randomPartition, killEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + +test(pellegMooreKMeans_Random_AllowEmpty) :- + pellegMooreKMeans(1000, randomPartition, allowEmptyCluster, [5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5], 3, 2, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). + + -test(testDescription4, [true(Error =:= 0.9797958971132711)]) :- - reset_Model_No_Train(perceptron), +test(pellegMooreKMeans_CSV_Input) :- open('src/data_csv/iris2.csv', read, File), - take_csv_row(File, skipFirstRow,10, Data), - train(Data, 4, [0,1,0,1,1,0,1,1,1,0], 2, perceptron, 50, 0.0001, Error). + take_csv_row(File, skipFirstRow, 30, Data), + pellegMooreKMeans(100, randomPartition, allowEmptyCluster, Data, 4, 3, AssignmentsList, CentroidsList, _), + print('\nAssignments :'), + print(AssignmentsList), + print('\nCentroids :'), + print(CentroidsList). -:- end_tests(predicate). +:- end_tests(pellegMooreKMeans). run_kmeans_tests :- run_tests. diff --git a/test_all.pl b/test_all.pl index 3ef6d61800b198bbbb237d6c811a7eee0218a158..a7b0293b97fc76c4ab5705e91d093fe921e36e9b 100644 --- a/test_all.pl +++ b/test_all.pl @@ -32,7 +32,7 @@ %%:- use_module('src/methods/kfn/kfn_test.pl'). -%%:- use_module('src/methods/kmeans/kmeans_test.pl'). +:- use_module('src/methods/kmeans/kmeans_test.pl'). %%:- use_module('src/methods/knn/knn_test.pl').