diff --git a/Makefile b/Makefile index c5941fd65baf03155084d535b67e1bad969545bf..89f5c486793f56443c8d1edd2888939bdd8a36a3 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,7 @@ all: make -C src/methods/linear_regression splfr=$(SPLFR_PATH) make -C src/methods/linear_SVM splfr=$(SPLFR_PATH) make -C src/methods/logistic_regression splfr=$(SPLFR_PATH) + make -C src/methods/lsh splfr=$(SPLFR_PATH) make -C src/methods/mean_shift splfr=$(SPLFR_PATH) make -C src/methods/naive_bayes_classifier splfr=$(SPLFR_PATH) make -C src/methods/perceptron splfr=$(SPLFR_PATH) @@ -29,6 +30,7 @@ clean: make -C src/methods/linear_regression clean make -C src/methods/linear_SVM clean make -C src/methods/logistic_regression clean + make -C src/methods/lsh clean make -C src/methods/mean_shift clean make -C src/methods/naive_bayes_classifier clean make -C src/methods/perceptron clean diff --git a/src/methods/lsh/Makefile b/src/methods/lsh/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b520001e90dac8db77d56c00721b1dc8f139c31a --- /dev/null +++ b/src/methods/lsh/Makefile @@ -0,0 +1,8 @@ +splfr=/usr/local/sicstus4.7.1/bin/splfr + +METHOD_NAME=lsh + +$(METHOD_NAME).so: $(METHOD_NAME).pl $(METHOD_NAME).cpp + $(splfr) -larmadillo -fopenmp -lmlpack -lstdc++ -cxx --struct $(METHOD_NAME).pl $(METHOD_NAME).cpp ../../helper_files/helper.cpp +clean: + rm $(METHOD_NAME).so diff --git a/src/methods/lsh/lsh.cpp b/src/methods/lsh/lsh.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0780526289440cd33f3597b3bfc0c7bca5b929d8 --- /dev/null +++ b/src/methods/lsh/lsh.cpp @@ -0,0 +1,131 @@ +#include <sicstus/sicstus.h> +/* ex_glue.h is generated by splfr from the foreign/[2,3] facts. + Always include the glue header in your foreign resource code. +*/ +#include "lsh_glue.h" +#include <mlpack/methods/lsh/lsh_search.hpp> +#include <mlpack/core.hpp> + +// including helper functions for converting between arma structures and arrays +#include "../../helper_files/helper.hpp" + +// some of the most used namespaces +using namespace arma; +using namespace mlpack; +using namespace std; +using namespace mlpack::neighbor; + +// Global Variable of the LSHSearch object so it can be accessed from all functions +LSHSearch lshSearch; + +// TODO: adding support for arma::cube + +// TODO: +// input: MatType referenceSet, +// const size_t numProj, +// const size_t numTables, +// const double hashWidth = 0.0, +// const size_t secondHashSize = 99901, +// const size_t bucketSize = 500 +// output: +// description: +void initModel(float *referenceMatArr, SP_integer referenceMatSize, SP_integer referenceMatRowNum, SP_integer numProj, SP_integer numTables, double hashWidth, SP_integer secondHashSize, SP_integer bucketSize) +{ + // convert the Prolog arrays to arma::mat + mat reference = convertArrayToMat(referenceMatArr, referenceMatSize, referenceMatRowNum); + + lshSearch = LSHSearch(reference, numProj, numTables, hashWidth, secondHashSize, bucketSize); +} + +// TODO: +// input: const arma::Mat< size_t > & foundNeighbors, +// const arma::Mat< size_t > & realNeighbors +// output: double recall percentage [0, 1] +// description: +double computeRecall(float *foundNeighborsMatArr, SP_integer foundNeighborsMatSize, SP_integer foundNeighborsMatRowNum, float *realNeighborsMatArr, SP_integer realNeighborsMatSize, SP_integer realNeighborsMatRowNum) +{ + // convert the Prolog arrays to arma::mat + Mat< size_t > foundNeighbors = conv_to<Mat< size_t >>::from(convertArrayToMat(foundNeighborsMatArr, foundNeighborsMatSize, foundNeighborsMatRowNum)); + + Mat< size_t > realNeighbors = conv_to<Mat< size_t >>::from(convertArrayToMat(realNeighborsMatArr, realNeighborsMatSize, realNeighborsMatRowNum)); + + return lshSearch.ComputeRecall(foundNeighbors, realNeighbors); +} + +// TODO: +// input: const MatType & querySet, +// const size_t k, +// arma::Mat< size_t > & resultingNeighbors <-, +// arma::mat & distances <-, +// const size_t numTablesToSearch = 0, +// const size_t T = 0 +// output: +// description: +void searchWithQuery(float *querySetMatArr, SP_integer querySetMatSize, SP_integer querySetMatRowNum, + SP_integer k, + float **resultingNeighborsMatArr, SP_integer *resultingNeighborsMatColNum, SP_integer *resultingNeighborsMatRowNum, + float **distancesMatArr, SP_integer *distancesMatColNum, SP_integer *distancesMatRowNum, + SP_integer numTablesToSearch, SP_integer T) +{ + // convert the Prolog arrays to arma::mat + mat querySet = convertArrayToMat(querySetMatArr, querySetMatSize, querySetMatRowNum); + + // create the ReturnMat + Mat< size_t > resultingNeighborsReturnMat; + + mat distancesReturnMat; + + lshSearch.Search(querySet, k, resultingNeighborsReturnMat, distancesReturnMat, numTablesToSearch, T); + + // return the Matrix + returnMatrixInformation(resultingNeighborsReturnMat, resultingNeighborsMatArr, resultingNeighborsMatColNum, resultingNeighborsMatRowNum); + + // return the Matrix + returnMatrixInformation(distancesReturnMat, distancesMatArr, distancesMatColNum, distancesMatRowNum); +} + +// TODO: +// input: const size_t k, +// arma::Mat< size_t > & resultingNeighbors <-, +// arma::mat & distances <-, +// const size_t numTablesToSearch = 0, +// size_t T = 0 +// output: +// description: +void searchNoQuery(SP_integer k, + float **resultingNeighborsMatArr, SP_integer *resultingNeighborsMatColNum, SP_integer *resultingNeighborsMatRowNum, + float **distancesMatArr, SP_integer *distancesMatColNum, SP_integer *distancesMatRowNum, + SP_integer numTablesToSearch, SP_integer T) +{ + // create the ReturnMat + Mat< size_t > resultingNeighborsReturnMat; + + mat distancesReturnMat; + + lshSearch.Search(k, resultingNeighborsReturnMat, distancesReturnMat, numTablesToSearch, T); + + // return the Matrix + returnMatrixInformation(resultingNeighborsReturnMat, resultingNeighborsMatArr, resultingNeighborsMatColNum, resultingNeighborsMatRowNum); + + // return the Matrix + returnMatrixInformation(distancesReturnMat, distancesMatArr, distancesMatColNum, distancesMatRowNum); +} + +// TODO: +// input: MatType referenceSet, +// const size_t numProj, +// const size_t numTables, +// const double hashWidth = 0.0, +// const size_t secondHashSize = 99901, +// const size_t bucketSize = 500, +// const arma::cube & projection = arma::cube() +// output: +// description: +void train(float *referencesMatArr, SP_integer referencesMatSize, SP_integer referencesMatRowNum, + SP_integer numProj, SP_integer numTables, double hashWidth, SP_integer secondHashSize, SP_integer bucketSize) +{ + // convert the Prolog arrays to arma::mat + mat references = convertArrayToMat(referencesMatArr, referencesMatSize, referencesMatRowNum); + + lshSearch.Train(references, numProj, numTables, hashWidth, secondHashSize, bucketSize); +} diff --git a/src/methods/lsh/lsh.pl b/src/methods/lsh/lsh.pl new file mode 100644 index 0000000000000000000000000000000000000000..10b8a703246e1ebb131eab5fe068d72b823096a9 --- /dev/null +++ b/src/methods/lsh/lsh.pl @@ -0,0 +1,96 @@ +:- module(lsh, [initModel/8, + computeRecall/7, + searchWithQuery/12, + searchNoQuery/9, + train/8]). + +%% requirements of library(struct) +:- load_files(library(str_decl), + [when(compile_time), if(changed)]). + +%% needed for using the array type +:- use_module(library(structs)). +:- use_module('../../helper_files/helper.pl'). + +%% type definitions for the float array +:- foreign_type + float32 = float_32, + float_array = array(float32). + +%% definitions for the connected function + +%% TODO: +%% --Input-- +%% mat referenceSet, +%% int numProj, +%% int numTables, +%% float32 hashWidth => 0.0, +%% int secondHashSize => 99901, +%% int bucketSize => 500 +%% +%% --Output-- +%% +%% --Description-- +foreign(initModel, c, initModel(+pointer(float_array), +integer, +integer, +integer, +integer, +float32, +integer, +integer)). + +%% TODO: +%% --Input-- +%% mat foundNeighbors, +%% mat realNeighbors +%% +%% --Output-- +%% float32 recall percentage => values in between [0,1] +%% +%% --Description-- +foreign(computeRecall, c, computeRecall(+pointer(float_array), +integer, +integer, +pointer(float_array), +integer, +integer, [-float32])). + +%% TODO: +%% --Input-- +%% mat querySet, +%% int k, +%% int numTablesToSearch => 0, +%% int T => 0 +%% +%% --Output-- +%% mat resultingNeighbors, +%% mat distances +%% +%% --Description-- +foreign(searchWithQuery, c, searchWithQuery(+pointer(float_array), +integer, +integer, +integer, -pointer(float_array), -integer, -integer, -pointer(float_array), -integer, -integer, +integer, +integer)). + +%% TODO: +%% --Input-- +%% int k, +%% int numTablesToSearch => 0, +%% int T => 0 +%% +%% --Output-- +%% mat resultingNeighbors, +%% mat distances +%% +%% --Description-- +foreign(searchNoQuery, c, searchNoQuery(+integer, -pointer(float_array), -integer, -integer, -pointer(float_array), -integer, -integer, +integer, +integer)). + +%% TODO: +%% --Input-- +%% mat referenceSet, +%% int numProj, +%% int numTables, +%% float32 hashWidth => 0.0, +%% int secondHashSize => 99901, +%% int bucketSize => 500 +%% +%% --Output-- +%% +%% --Description-- +foreign(train, c, train(+pointer(float_array), +integer, +integer, +integer, +integer, +float32, +integer, +integer)). + + +%% Defines the functions that get connected from main.cpp +foreign_resource(lsh, [ initModel, + computeRecall, + searchWithQuery, + searchNoQuery, + train]). + +:- load_foreign_resource(lsh). \ No newline at end of file diff --git a/src/methods/lsh/lsh_test.pl b/src/methods/lsh/lsh_test.pl new file mode 100644 index 0000000000000000000000000000000000000000..cbc014739788f87069df36c86f2a88f41a9b752c --- /dev/null +++ b/src/methods/lsh/lsh_test.pl @@ -0,0 +1,56 @@ +:- use_module(library(plunit)). + +:- use_module(lsh). +:- use_module('../../helper_files/helper.pl'). + +reset_Model :- + initModel(1,0,50,0.0001). + +:- begin_tests(lists). + +%% alpha tests +test(alpha_std_init) :- + reset_Model, + alpha(0). +test(alpha_wrong_input, fail) :- + reset_Model, + alpha(1). +test(alpha_after_train, A =:= 9223372036854775808) :- + reset_Model, + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5],3, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize), + alpha(A). + +%% train tests +test(correct_train) :- + reset_Model, + convert_list_to_float_array([5.1,3.5,1.4,4.9,3.0,1.4,4.7,3.2,1.3,4.6,3.1,1.5],3, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train, fail) :- + reset_Model, + convert_list_to_float_array([],3, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train2, fail) :- + reset_Model, + convert_list_to_float_array([],0, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train3, fail) :- + reset_Model, + convert_list_to_float_array([1,2],0, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train3, fail) :- + reset_Model, + convert_list_to_float_array([1,2,44,3],3, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +test(false_train4) :- + reset_Model, + convert_list_to_float_array([1,2,44,3],2, array(Xsize, Xrownum, X)), + convert_list_to_float_array([0.2,0.2,0.2,0.2], array(Ysize, Y)), + train(X,Xsize, Xrownum,Y, Ysize). +:- end_tests(lists). \ No newline at end of file