diff --git a/Makefile b/Makefile index 89f5c486793f56443c8d1edd2888939bdd8a36a3..7be3f52f04333a13dd3997d4c5684d3c2cb1b9ce 100644 --- a/Makefile +++ b/Makefile @@ -4,8 +4,11 @@ SPLFR_PATH=/usr/local/sicstus4.7.1/bin/splfr all: make -C src/methods/ada_boost splfr=$(SPLFR_PATH) make -C src/methods/bayesian_linear_regression splfr=$(SPLFR_PATH) + make -C src/methods/cf splfr=$(SPLFR_PATH) make -C src/methods/dbscan splfr=$(SPLFR_PATH) make -C src/methods/emst splfr=$(SPLFR_PATH) + make -C src/methods/fastmks splfr=$(SPLFR_PATH) + make -C src/methods/kernel_pca splfr=$(SPLFR_PATH) make -C src/methods/kmeans splfr=$(SPLFR_PATH) make -C src/methods/lars splfr=$(SPLFR_PATH) make -C src/methods/linear_regression splfr=$(SPLFR_PATH) @@ -18,13 +21,15 @@ all: make -C src/methods/random_forest splfr=$(SPLFR_PATH) make -C src/methods/softmac_regression splfr=$(SPLFR_PATH) make -C src/methods/approx_kfn splfr=$(SPLFR_PATH) - make -C src/methods/fastmks splfr=$(SPLFR_PATH) clean: make -C src/methods/ada_boost clean make -C src/methods/bayesian_linear_regression clean + make -C src/methods/cf clean make -C src/methods/dbscan clean make -C src/methods/emst clean + make -C src/methods/fastmks clean + make -C src/methods/kernel_pca clean make -C src/methods/kmeans clean make -C src/methods/lars clean make -C src/methods/linear_regression clean @@ -37,6 +42,5 @@ clean: make -C src/methods/random_forest clean make -C src/methods/softmac_regression clean make -C src/methods/approx_kfn clean - make -C src/methods/fastmks clean \ No newline at end of file diff --git a/src/methods/kernel_pca/Makefile b/src/methods/kernel_pca/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..76bab9734a8c694cee152b5f8fc902b41259c6e7 --- /dev/null +++ b/src/methods/kernel_pca/Makefile @@ -0,0 +1,8 @@ +splfr=/usr/local/sicstus4.7.1/bin/splfr + +METHOD_NAME=kernel_pca + +$(METHOD_NAME).so: $(METHOD_NAME).pl $(METHOD_NAME).cpp + $(splfr) -larmadillo -fopenmp -lmlpack -lstdc++ -cxx --struct $(METHOD_NAME).pl $(METHOD_NAME).cpp ../../helper_files/helper.cpp +clean: + rm $(METHOD_NAME).so diff --git a/src/methods/kernel_pca/kernel_pca.cpp b/src/methods/kernel_pca/kernel_pca.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9dad7df4b073b2cf85d32a56da83b7903fa4086e --- /dev/null +++ b/src/methods/kernel_pca/kernel_pca.cpp @@ -0,0 +1,189 @@ +#include <sicstus/sicstus.h> +/* ex_glue.h is generated by splfr from the foreign/[2,3] facts. + Always include the glue header in your foreign resource code. +*/ +#include "kernel_pca_glue.h" +#include <mlpack/methods/kernel_pca/kernel_pca.hpp> +#include <mlpack/core.hpp> + +#include <mlpack/methods/kernel_pca/kernel_rules/nystroem_method.hpp> + +#include <mlpack/methods/nystroem_method/ordered_selection.hpp> +#include <mlpack/methods/nystroem_method/random_selection.hpp> + +// including helper functions for converting between arma structures and arrays +#include "../../helper_files/helper.hpp" + +// some of the most used namespaces +using namespace arma; +using namespace mlpack; +using namespace std; +using namespace mlpack::kpca; +using namespace mlpack::kernel; + + +template<typename KernelType> +void useNystroemKernelPCA(char const *nystroemMethod, KernelType kernel, bool centerTranformedData, mat data, mat transformedDataReturnMat, vec eigvalReturnVector, mat eigvecReturnMat, size_t returnDim) +{ + if (strcmp(nystroemMethod, "kmeans") == 0) + { + KernelPCA<KernelType, NystroemKernelRule<KernelType, KMeansSelection<>>>(kernel, (centerTranformedData == 1)) + .Apply(data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + else if (strcmp(nystroemMethod, "ordered") == 0) + { + KernelPCA<KernelType, NystroemKernelRule<KernelType, OrderedSelection>>(kernel, (centerTranformedData == 1)) + .Apply(data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + else if (strcmp(nystroemMethod, "random") == 0) + { + KernelPCA<KernelType, NystroemKernelRule<KernelType, RandomSelection>>(kernel, (centerTranformedData == 1)) + .Apply(data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + else + { + cout << "wrong nystroem input" << endl; + } +} + +// TODO: +// input: const KernelType kernel = KernelType(), +// const bool centerTransformedData = false +// const arma::mat & data, +// arma::mat & transformedData, +// arma::vec & eigval, +// arma::mat & eigvec, +// const size_t newDimension +// output: +// description: +void kernel_pca(char const *kernel, SP_integer centerTranformedData, SP_integer useNystoem, char const *nystroemMethod, + double degree, double offset, double bandwidth, double scale, + float *dataMatArr, SP_integer dataMatSize, SP_integer dataMatRowNum, + float **transformedDataMatArr, SP_integer *transformedDataMatColNum, SP_integer *transformedDataMatRowNum, + float **eigvalArr, SP_integer *eigvalArrSize, + float **eigvecMatArr, SP_integer *eigvecMatColNum, SP_integer *eigvecMatRowNum, + SP_integer *newDimension) +{ + // convert the Prolog arrays to arma::mat + mat data = convertArrayToMat(dataMatArr, dataMatSize, dataMatRowNum); + + + // create the ReturnMat + mat transformedDataReturnMat; + + // create the ReturnVector + vec eigvalReturnVector; + + // create the ReturnMat + mat eigvecReturnMat; + + size_t returnDim = 0; + + + if (strcmp(kernel, "linear") == 0) + { + LinearKernel lk; + if(useNystoem == 1) + { + useNystroemKernelPCA<LinearKernel>(nystroemMethod, lk, (centerTranformedData == 1), data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + else + { + KernelPCA<LinearKernel, NaiveKernelRule<LinearKernel>>(lk, (centerTranformedData == 1)) + .Apply(data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + } + else if (strcmp(kernel, "polynomial") == 0) + { + PolynomialKernel pk(degree, offset); + if(useNystoem == 1) + { + useNystroemKernelPCA<PolynomialKernel>(nystroemMethod, pk, (centerTranformedData == 1), data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + else + { + KernelPCA<PolynomialKernel, NaiveKernelRule<PolynomialKernel>>(pk, (centerTranformedData == 1)) + .Apply(data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + } + else if (strcmp(kernel, "cosine") == 0) + { + CosineDistance cd; + if(useNystoem == 1) + { + useNystroemKernelPCA<CosineDistance>(nystroemMethod, cd, (centerTranformedData == 1), data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + else + { + KernelPCA<CosineDistance, NaiveKernelRule<CosineDistance>>(cd, (centerTranformedData == 1)) + .Apply(data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + } + else if (strcmp(kernel, "gaussian") == 0) + { + GaussianKernel gk(bandwidth); + if(useNystoem == 1) + { + useNystroemKernelPCA<GaussianKernel>(nystroemMethod, gk, (centerTranformedData == 1), data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + else + { + KernelPCA<GaussianKernel, NaiveKernelRule<GaussianKernel>>(gk, (centerTranformedData == 1)) + .Apply(data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + } + else if (strcmp(kernel, "epanechnikov") == 0) + { + EpanechnikovKernel ek(bandwidth); + if(useNystoem == 1) + { + useNystroemKernelPCA<EpanechnikovKernel>(nystroemMethod, ek, (centerTranformedData == 1), data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + else + { + KernelPCA<EpanechnikovKernel, NaiveKernelRule<EpanechnikovKernel>>(ek, (centerTranformedData == 1)) + .Apply(data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + } + else if (strcmp(kernel, "laplacian") == 0) + { + LaplacianKernel tk(bandwidth); + if(useNystoem == 1) + { + useNystroemKernelPCA<LaplacianKernel>(nystroemMethod, tk, (centerTranformedData == 1), data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + else + { + KernelPCA<LaplacianKernel, NaiveKernelRule<LaplacianKernel>>(tk, (centerTranformedData == 1)) + .Apply(data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + } + else if (strcmp(kernel, "hyptan") == 0) + { + HyperbolicTangentKernel htk(scale, offset); + if(useNystoem == 1) + { + useNystroemKernelPCA<HyperbolicTangentKernel>(nystroemMethod, htk, (centerTranformedData == 1), data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + else + { + KernelPCA<HyperbolicTangentKernel, NaiveKernelRule<HyperbolicTangentKernel>>(htk, (centerTranformedData == 1)) + .Apply(data, transformedDataReturnMat, eigvalReturnVector, eigvecReturnMat, returnDim); + } + } + else + { + cout << "wrong kernel input." << endl; + } + + // return the transformedData Matrix + returnMatrixInformation(transformedDataReturnMat, transformedDataMatArr, transformedDataMatColNum, transformedDataMatRowNum); + + // return the eigval Vector + returnVectorInformation(eigvalReturnVector, eigvalArr, eigvalArrSize); + + // return the eigvec Matrix + returnMatrixInformation(eigvecReturnMat, eigvecMatArr, eigvecMatColNum, eigvecMatRowNum); + + *newDimension = returnDim; +} \ No newline at end of file diff --git a/src/methods/kernel_pca/kernel_pca.pl b/src/methods/kernel_pca/kernel_pca.pl new file mode 100644 index 0000000000000000000000000000000000000000..55e8a944c39f3156551c240c69de9407fc845a3b --- /dev/null +++ b/src/methods/kernel_pca/kernel_pca.pl @@ -0,0 +1,49 @@ +:- module(kernel_pca, [kernel_pca/20]). + +%% requirements of library(struct) +:- load_files(library(str_decl), + [when(compile_time), if(changed)]). + +%% needed for using the array type +:- use_module(library(structs)). +:- use_module('../../helper_files/helper.pl'). + +%% type definitions for the float array +:- foreign_type + float32 = float_32, + float_array = array(float32). + +%% definitions for the connected function + +%% TODO: +%% --Input-- +%% string kernel "advb","agbadf", +%% bool name => (1)true / (0)false, +%% bool name => (1)true / (0)false, +%% string nystroemMethod "kmeans", "ordered", "random", +%% float32 degree needed by polynomial, +%% float32 offset needed by polynomial, hyptan, +%% float32 bandwidth needed by gaussian, epanechnikov, laplacian, +%% float32 scale needed by hyptan, +%% mat data +%% +%% --Output-- +%% mat transformedData, +%% vec eigenValues, +%% mat eigenVectores, +%% int newDimension +%% +%% --Description-- +foreign(kernel_pca, c, kernel_pca(+string, +integer, +integer, +string, + +float32, +float32, +float32, +float32, + +pointer(float_array), +integer, +integer, + -pointer(float_array), -integer, -integer, + -pointer(float_array), -integer, + -pointer(float_array), -integer, -integer, + -integer)). + + +%% Defines the functions that get connected from main.cpp +foreign_resource(kernel_pca, [kernel_pca]). + +:- load_foreign_resource(kernel_pca). \ No newline at end of file diff --git a/src/methods/kernel_pca/kernel_pca_test.pl b/src/methods/kernel_pca/kernel_pca_test.pl new file mode 100644 index 0000000000000000000000000000000000000000..928e67ebc5d0b9b079812c67e30d9c4b11df4a81 --- /dev/null +++ b/src/methods/kernel_pca/kernel_pca_test.pl @@ -0,0 +1,56 @@ +:- use_module(library(plunit)). + +:- use_module(kernel_pca). +:- use_module('../../helper_files/helper.pl'). + +reset_Model :- + initModel(1,0,50,0.0001). + +:- begin_tests(lists). + +%% alpha tests +test(alpha_std_init) :- + reset_Model, + alpha(0). +test(alpha_wrong_input, fail) :- + reset_Model, + alpha(1). +test(alpha_after_train, A =:= 9223372036854775808) :- + reset_Model, + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5],3, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize), + alpha(A). + +%% train tests +test(correct_train) :- + reset_Model, + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5],3, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train, fail) :- + reset_Model, + convert_list_to_float_array([],3, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train2, fail) :- + reset_Model, + convert_list_to_float_array([],0, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train3, fail) :- + reset_Model, + convert_list_to_float_array([1,2],0, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train3, fail) :- + reset_Model, + convert_list_to_float_array([1,2,44,3],3, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train4) :- + reset_Model, + convert_list_to_float_array([1,2,44,3],2, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +:- end_tests(lists). \ No newline at end of file