doc/html/kgraph-data_8h_source.html

 #ifndef WDONG_KGRAPH_DATA

 #define WDONG_KGRAPH_DATA


 #include <cmath>

 #include <cstring>

 #include <malloc.h>

 #include <vector>

 #include <fstream>

 #include <stdexcept>

 #include <boost/assert.hpp>


 #ifdef __GNUC__

 #ifdef __AVX__

 #define KGRAPH_MATRIX_ALIGN 32

 #else

 #ifdef __SSE2__

 #define KGRAPH_MATRIX_ALIGN 16

 #else

 #define KGRAPH_MATRIX_ALIGN 4

 #endif

 #endif

 #endif


 namespace kgraph {


     extern float float_l2sqr_avx (float const *t1, float const *t2, unsigned dim);

     extern float float_l2sqr_sse2 (float const *t1, float const *t2, unsigned dim);

     extern float float_l2sqr_sse2 (float const *, unsigned dim);

     extern float float_dot_sse2 (float const *, float const *, unsigned dim);

     extern float uint8_l2sqr_sse2 (uint8_t const *t1, uint8_t const *t2, unsigned dim);


     extern float float_l2sqr (float const *, float const *, unsigned dim);

     extern float float_l2sqr (float const *, unsigned dim);

     extern float float_dot (float const *, float const *, unsigned dim);


     using std::vector;


     namespace metric {

         struct l2sqr {

             template <typename T>

             static float apply (T const *t1, T const *t2, unsigned dim) {

                 float r = 0;

                 for (unsigned i = 0; i < dim; ++i) {

                     float v = float(t1[i]) - float(t2[i]);

                     v *= v;

                     r += v;

                 }

                 return r;

             }


             template <typename T>

             static float dot (T const *t1, T const *t2, unsigned dim) {

                 float r = 0;

                 for (unsigned i = 0; i < dim; ++i) {

                     r += float(t1[i]) *float(t2[i]);

                 }

                 return r;

             }


             template <typename T>

             static float norm2 (T const *t1, unsigned dim) {

                 float r = 0;

                 for (unsigned i = 0; i < dim; ++i) {

                     float v = float(t1[i]);

                     v *= v;

                     r += v;

                 }

                 return r;

             }

         };


         struct l2 {

             template <typename T>

             static float apply (T const *t1, T const *t2, unsigned dim) {

                 return sqrt(l2sqr::apply<T>(t1, t2, dim));

             }

         };

     }


     template <typename T, unsigned A = KGRAPH_MATRIX_ALIGN>

     class Matrix {

         unsigned col;

         unsigned row;

         size_t stride;

         char *data;


         void reset (unsigned r, unsigned c) {

             row = r;

             col = c;

             stride = (sizeof(T) * c + A - 1) / A * A;

             /*

             data.resize(row * stride);

             */

             if (data) free(data);

             data = (char *)memalign(A, row * stride); // SSE instruction needs data to be aligned

             if (!data) throw runtime_error("memalign");

         }

     public:

         Matrix (): col(0), row(0), stride(0), data(0) {}

         Matrix (unsigned r, unsigned c): data(0) {

             reset(r, c);

         }

         ~Matrix () {

             if (data) free(data);

         }

         unsigned size () const {

             return row;

         }

         unsigned dim () const {

             return col;

         }

         size_t step () const {

             return stride;

         }

         void resize (unsigned r, unsigned c) {

             reset(r, c);

         }

         T const *operator [] (unsigned i) const {

             return reinterpret_cast<T const *>(&data[stride * i]);

         }

         T *operator [] (unsigned i) {

             return reinterpret_cast<T *>(&data[stride * i]);

         }

         void zero () {

             memset(data, 0, row * stride);

         }


         void normalize2 () {

 #pragma omp parallel for

             for (unsigned i = 0; i < row; ++i) {

                 T *p = operator[](i);

                 double sum = metric::l2sqr::norm2(p, col);

                 sum = std::sqrt(sum);

                 for (unsigned j = 0; j < col; ++j) {

                     p[j] /= sum;

                 }

             }

         }


         void load (const std::string &path, unsigned dim, unsigned skip = 0, unsigned gap = 0) {

             std::ifstream is(path.c_str(), std::ios::binary);

             if (!is) throw io_error(path);

             is.seekg(0, std::ios::end);

             size_t size = is.tellg();

             size -= skip;

             unsigned line = sizeof(T) * dim + gap;

             unsigned N =  size / line;

             reset(N, dim);

             zero();

             is.seekg(skip, std::ios::beg);

             for (unsigned i = 0; i < N; ++i) {

                 is.read(&data[stride * i], sizeof(T) * dim);

                 is.seekg(gap, std::ios::cur);

             }

             if (!is) throw io_error(path);

         }


         void load_lshkit (std::string const &path) {

             static const unsigned LSHKIT_HEADER = 3;

             std::ifstream is(path.c_str(), std::ios::binary);

             unsigned header[LSHKIT_HEADER]; /* entry size, row, col */

             is.read((char *)header, sizeof header);

             if (!is) throw io_error(path);

             if (header[0] != sizeof(T)) throw io_error(path);

             is.close();

             unsigned D = header[2];

             unsigned skip = LSHKIT_HEADER * sizeof(unsigned);

             unsigned gap = 0;

             load(path, D, skip, gap);

         }


         void save_lshkit (std::string const &path) {

             std::ofstream os(path.c_str(), std::ios::binary);

             unsigned header[3];

             assert(sizeof header == 3*4);

             header[0] = sizeof(T);

             header[1] = row;

             header[2] = col;

             os.write((const char *)header, sizeof(header));

             for (unsigned i = 0; i < row; ++i) {

                 os.write(&data[stride * i], sizeof(T) * col);

             }

         }

     };


     template <typename DATA_TYPE, unsigned A = KGRAPH_MATRIX_ALIGN>

     class MatrixProxy {

         unsigned rows;

         unsigned cols;      // # elements, not bytes, in a row,

         size_t stride;    // # bytes in a row, >= cols * sizeof(element)

         uint8_t const *data;

     public:

         MatrixProxy (Matrix<DATA_TYPE> const &m)

             : rows(m.size()), cols(m.dim()), stride(m.step()), data(reinterpret_cast<uint8_t const *>(m[0])) {

         }


 #ifndef __AVX__

 #ifdef FLANN_DATASET_H_

         MatrixProxy (flann::Matrix<DATA_TYPE> const &m)

             : rows(m.rows), cols(m.cols), stride(m.stride), data(m.data) {

             if (stride % A) throw invalid_argument("bad alignment");

         }

 #endif

 #ifdef __OPENCV_CORE_HPP__

         MatrixProxy (cv::Mat const &m)

             : rows(m.rows), cols(m.cols), stride(m.step), data(m.data) {

             if (stride % A) throw invalid_argument("bad alignment");

         }

 #endif

 #ifdef NPY_NDARRAYOBJECT_H

         MatrixProxy (PyArrayObject *obj) {

             if (!obj || (obj->nd != 2)) throw invalid_argument("bad array shape");

             rows = obj->dimensions[0];

             cols = obj->dimensions[1];

             stride = obj->strides[0];

             data = reinterpret_cast<uint8_t const *>(obj->data);

             if (obj->descr->elsize != sizeof(DATA_TYPE)) throw invalid_argument("bad data type size");

             if (stride % A) throw invalid_argument("bad alignment");

             if (!(stride >= cols * sizeof(DATA_TYPE))) throw invalid_argument("bad stride");

         }

 #endif

 #endif

         unsigned size () const {

             return rows;

         }

         unsigned dim () const {

             return cols;

         }

         DATA_TYPE const *operator [] (unsigned i) const {

             return reinterpret_cast<DATA_TYPE const *>(data + stride * i);

         }

         DATA_TYPE *operator [] (unsigned i) {

             return const_cast<DATA_TYPE *>(reinterpret_cast<DATA_TYPE const *>(data + stride * i));

         }

     };


     template <typename DATA_TYPE, typename DIST_TYPE>

     class MatrixOracle: public kgraph::IndexOracle {

         MatrixProxy<DATA_TYPE> proxy;

     public:

         class SearchOracle: public kgraph::SearchOracle {

             MatrixProxy<DATA_TYPE> proxy;

             DATA_TYPE const *query;

         public:

             SearchOracle (MatrixProxy<DATA_TYPE> const &p, DATA_TYPE const *q): proxy(p), query(q) {

             }

             virtual unsigned size () const {

                 return proxy.size();

             }

             virtual float operator () (unsigned i) const {

                 return DIST_TYPE::apply(proxy[i], query, proxy.dim());

             }

         };

         template <typename MATRIX_TYPE>

         MatrixOracle (MATRIX_TYPE const &m): proxy(m) {

         }

         virtual unsigned size () const {

             return proxy.size();

         }

         virtual float operator () (unsigned i, unsigned j) const {

             return DIST_TYPE::apply(proxy[i], proxy[j], proxy.dim());

         }

         SearchOracle query (DATA_TYPE const *query) const {

             return SearchOracle(proxy, query);

         }

     };


     inline float AverageRecall (Matrix<float> const &gs, Matrix<float> const &result, unsigned K = 0) {

         if (K == 0) {

             K = result.dim();

         }

         if (!(gs.dim() >= K)) throw invalid_argument("gs.dim() >= K");

         if (!(result.dim() >= K)) throw invalid_argument("result.dim() >= K");

         if (!(gs.size() >= result.size())) throw invalid_argument("gs.size() > result.size()");

         float sum = 0;

         for (unsigned i = 0; i < result.size(); ++i) {

             float const *gs_row = gs[i];

             float const *re_row = result[i];

             // compare

             unsigned found = 0;

             unsigned gs_n = 0;

             unsigned re_n = 0;

             while ((gs_n < K) && (re_n < K)) {

                 if (gs_row[gs_n] < re_row[re_n]) {

                     ++gs_n;

                 }

                 else if (gs_row[gs_n] == re_row[re_n]) {

                     ++found;

                     ++gs_n;

                     ++re_n;

                 }

                 else {

                     throw runtime_error("distance is unstable");

                 }

             }

             sum += float(found) / K;

         }

         return sum / result.size();

     }


 }


 #ifndef KGRAPH_NO_VECTORIZE

 #ifdef __GNUC__

 #ifdef __AVX__

 #if 0

 namespace kgraph { namespace metric {

         template <>

         inline float l2sqr::apply<float> (float const *t1, float const *t2, unsigned dim) {

             return float_l2sqr_avx(t1, t2, dim);

         }

 }}

 #endif

 #else

 #ifdef __SSE2__

 namespace kgraph { namespace metric {

         template <>

         inline float l2sqr::apply<float> (float const *t1, float const *t2, unsigned dim) {

             return float_l2sqr_sse2(t1, t2, dim);

         }

         template <>

         inline float l2sqr::dot<float> (float const *t1, float const *t2, unsigned dim) {

             return float_dot_sse2(t1, t2, dim);

         }

         template <>

         inline float l2sqr::norm2<float> (float const *t1, unsigned dim) {

             return float_l2sqr_sse2(t1, dim);

         }

         template <>

         inline float l2sqr::apply<uint8_t> (uint8_t const *t1, uint8_t const *t2, unsigned dim) {

             return uint8_l2sqr_sse2(t1, t2, dim);

         }

 }}

 #endif

 #endif

 #endif

 #endif


 #endif


kgraph::metric::l2sqr
L2 square distance.
Definition: kgraph-data.h:47

kgraph::metric::l2sqr::dot
static float dot(T const *t1, T const *t2, unsigned dim)
inner product.
Definition: kgraph-data.h:62

kgraph::SearchOracle
Search oracle.
Definition: kgraph.h:66

kgraph::metric::l2
Definition: kgraph-data.h:83

kgraph::metric::l2sqr::norm2
static float norm2(T const *t1, unsigned dim)
L2 norm.
Definition: kgraph-data.h:72

kgraph::IndexOracle
Index oracle.
Definition: kgraph.h:49

kgraph::MatrixOracle::size
virtual unsigned size() const
Returns the size of the dataset.
Definition: kgraph-data.h:277

kgraph::MatrixOracle::SearchOracle
Definition: kgraph-data.h:261

kgraph::MatrixOracle::SearchOracle::operator()
virtual float operator()(unsigned i) const
Computes similarity.
Definition: kgraph-data.h:270

kgraph::Matrix
Matrix data.
Definition: kgraph-data.h:93

kgraph::MatrixOracle
Oracle for Matrix or MatrixProxy.
Definition: kgraph-data.h:258

kgraph::MatrixProxy
Matrix proxy to interface with 3rd party libraries (FLANN, OpenCV, NumPy).
Definition: kgraph-data.h:200

kgraph::MatrixOracle::operator()
virtual float operator()(unsigned i, unsigned j) const
Computes similarity.
Definition: kgraph-data.h:280

kgraph::metric::l2sqr::apply
static float apply(T const *t1, T const *t2, unsigned dim)
L2 square distance.
Definition: kgraph-data.h:50

kgraph::MatrixOracle::SearchOracle::size
virtual unsigned size() const
Returns the size of the dataset.
Definition: kgraph-data.h:267