1 #ifndef WDONG_KGRAPH_DATA
2 #define WDONG_KGRAPH_DATA
10 #include <boost/assert.hpp>
14 #define KGRAPH_MATRIX_ALIGN 32
17 #define KGRAPH_MATRIX_ALIGN 16
19 #define KGRAPH_MATRIX_ALIGN 4
29 extern float float_l2sqr_avx (
float const *t1,
float const *t2,
unsigned dim);
31 extern float float_l2sqr_sse2 (
float const *t1,
float const *t2,
unsigned dim);
32 extern float float_l2sqr_sse2 (
float const *,
unsigned dim);
33 extern float float_dot_sse2 (
float const *,
float const *,
unsigned dim);
35 extern float uint8_l2sqr_sse2 (uint8_t
const *t1, uint8_t
const *t2,
unsigned dim);
37 extern float float_l2sqr (
float const *,
float const *,
unsigned dim);
38 extern float float_l2sqr (
float const *,
unsigned dim);
39 extern float float_dot (
float const *,
float const *,
unsigned dim);
50 static float apply (T
const *t1, T
const *t2,
unsigned dim) {
52 for (
unsigned i = 0; i < dim; ++i) {
53 float v = float(t1[i]) - float(t2[i]);
62 static float dot (T
const *t1, T
const *t2,
unsigned dim) {
64 for (
unsigned i = 0; i < dim; ++i) {
65 r += float(t1[i]) *float(t2[i]);
72 static float norm2 (T
const *t1,
unsigned dim) {
74 for (
unsigned i = 0; i < dim; ++i) {
75 float v = float(t1[i]);
85 static float apply (T
const *t1, T
const *t2,
unsigned dim) {
86 return sqrt(l2sqr::apply<T>(t1, t2, dim));
92 template <
typename T,
unsigned A = KGRAPH_MATRIX_ALIGN>
99 void reset (
unsigned r,
unsigned c) {
102 stride = (
sizeof(T) * c + A - 1) / A * A;
106 if (data) free(data);
107 data = (
char *)memalign(A, row * stride);
108 if (!data)
throw runtime_error(
"memalign");
111 Matrix (): col(0), row(0), stride(0), data(0) {}
112 Matrix (
unsigned r,
unsigned c): data(0) {
116 if (data) free(data);
118 unsigned size ()
const {
121 unsigned dim ()
const {
124 size_t step ()
const {
127 void resize (
unsigned r,
unsigned c) {
130 T
const *operator [] (
unsigned i)
const {
131 return reinterpret_cast<T
const *
>(&data[stride * i]);
133 T *operator [] (
unsigned i) {
134 return reinterpret_cast<T *
>(&data[stride * i]);
137 memset(data, 0, row * stride);
141 #pragma omp parallel for
142 for (
unsigned i = 0; i < row; ++i) {
143 T *p = operator[](i);
145 sum = std::sqrt(sum);
146 for (
unsigned j = 0; j < col; ++j) {
152 void load (
const std::string &path,
unsigned dim,
unsigned skip = 0,
unsigned gap = 0) {
153 std::ifstream is(path.c_str(), std::ios::binary);
154 if (!is)
throw io_error(path);
155 is.seekg(0, std::ios::end);
156 size_t size = is.tellg();
158 unsigned line =
sizeof(T) * dim + gap;
159 unsigned N = size / line;
162 is.seekg(skip, std::ios::beg);
163 for (
unsigned i = 0; i < N; ++i) {
164 is.read(&data[stride * i],
sizeof(T) * dim);
165 is.seekg(gap, std::ios::cur);
167 if (!is)
throw io_error(path);
170 void load_lshkit (std::string
const &path) {
171 static const unsigned LSHKIT_HEADER = 3;
172 std::ifstream is(path.c_str(), std::ios::binary);
173 unsigned header[LSHKIT_HEADER];
174 is.read((
char *)header,
sizeof header);
175 if (!is)
throw io_error(path);
176 if (header[0] !=
sizeof(T))
throw io_error(path);
178 unsigned D = header[2];
179 unsigned skip = LSHKIT_HEADER *
sizeof(unsigned);
181 load(path, D, skip, gap);
184 void save_lshkit (std::string
const &path) {
185 std::ofstream os(path.c_str(), std::ios::binary);
187 assert(
sizeof header == 3*4);
188 header[0] =
sizeof(T);
191 os.write((
const char *)header,
sizeof(header));
192 for (
unsigned i = 0; i < row; ++i) {
193 os.write(&data[stride * i],
sizeof(T) * col);
199 template <
typename DATA_TYPE,
unsigned A = KGRAPH_MATRIX_ALIGN>
207 : rows(m.size()), cols(m.dim()), stride(m.step()), data(reinterpret_cast<uint8_t const *>(m[0])) {
211 #ifdef FLANN_DATASET_H_
214 : rows(m.rows), cols(m.cols), stride(m.stride), data(m.data) {
215 if (stride % A)
throw invalid_argument(
"bad alignment");
218 #ifdef __OPENCV_CORE_HPP__
221 : rows(m.rows), cols(m.cols), stride(m.step), data(m.data) {
222 if (stride % A)
throw invalid_argument(
"bad alignment");
225 #ifdef NPY_NDARRAYOBJECT_H
228 if (!obj || (obj->nd != 2))
throw invalid_argument(
"bad array shape");
229 rows = obj->dimensions[0];
230 cols = obj->dimensions[1];
231 stride = obj->strides[0];
232 data =
reinterpret_cast<uint8_t
const *
>(obj->data);
233 if (obj->descr->elsize !=
sizeof(DATA_TYPE))
throw invalid_argument(
"bad data type size");
234 if (stride % A)
throw invalid_argument(
"bad alignment");
235 if (!(stride >= cols *
sizeof(DATA_TYPE)))
throw invalid_argument(
"bad stride");
239 unsigned size ()
const {
242 unsigned dim ()
const {
245 DATA_TYPE
const *operator [] (
unsigned i)
const {
246 return reinterpret_cast<DATA_TYPE
const *
>(data + stride * i);
248 DATA_TYPE *operator [] (
unsigned i) {
249 return const_cast<DATA_TYPE *
>(
reinterpret_cast<DATA_TYPE
const *
>(data + stride * i));
257 template <
typename DATA_TYPE,
typename DIST_TYPE>
263 DATA_TYPE
const *query;
267 virtual unsigned size ()
const {
271 return DIST_TYPE::apply(proxy[i], query, proxy.dim());
274 template <
typename MATRIX_TYPE>
277 virtual unsigned size ()
const {
281 return DIST_TYPE::apply(proxy[i], proxy[j], proxy.dim());
288 inline float AverageRecall (Matrix<float>
const &gs, Matrix<float>
const &result,
unsigned K = 0) {
292 if (!(gs.dim() >= K))
throw invalid_argument(
"gs.dim() >= K");
293 if (!(result.dim() >= K))
throw invalid_argument(
"result.dim() >= K");
294 if (!(gs.size() >= result.size()))
throw invalid_argument(
"gs.size() > result.size()");
296 for (
unsigned i = 0; i < result.size(); ++i) {
297 float const *gs_row = gs[i];
298 float const *re_row = result[i];
303 while ((gs_n < K) && (re_n < K)) {
304 if (gs_row[gs_n] < re_row[re_n]) {
307 else if (gs_row[gs_n] == re_row[re_n]) {
313 throw runtime_error(
"distance is unstable");
316 sum += float(found) / K;
318 return sum / result.size();
324 #ifndef KGRAPH_NO_VECTORIZE
328 namespace kgraph {
namespace metric {
330 inline float l2sqr::apply<float> (
float const *t1,
float const *t2,
unsigned dim) {
331 return float_l2sqr_avx(t1, t2, dim);
337 namespace kgraph {
namespace metric {
339 inline float l2sqr::apply<float> (
float const *t1,
float const *t2,
unsigned dim) {
340 return float_l2sqr_sse2(t1, t2, dim);
343 inline float l2sqr::dot<float> (
float const *t1,
float const *t2,
unsigned dim) {
344 return float_dot_sse2(t1, t2, dim);
347 inline float l2sqr::norm2<float> (
float const *t1,
unsigned dim) {
348 return float_l2sqr_sse2(t1, dim);
351 inline float l2sqr::apply<uint8_t> (uint8_t
const *t1, uint8_t
const *t2,
unsigned dim) {
352 return uint8_l2sqr_sse2(t1, t2, dim);
L2 square distance.
Definition: kgraph-data.h:47
static float dot(T const *t1, T const *t2, unsigned dim)
inner product.
Definition: kgraph-data.h:62
Search oracle.
Definition: kgraph.h:66
Definition: kgraph-data.h:83
static float norm2(T const *t1, unsigned dim)
L2 norm.
Definition: kgraph-data.h:72
Index oracle.
Definition: kgraph.h:49
virtual unsigned size() const
Returns the size of the dataset.
Definition: kgraph-data.h:277
Definition: kgraph-data.h:261
virtual float operator()(unsigned i) const
Computes similarity.
Definition: kgraph-data.h:270
Matrix data.
Definition: kgraph-data.h:93
Oracle for Matrix or MatrixProxy.
Definition: kgraph-data.h:258
Matrix proxy to interface with 3rd party libraries (FLANN, OpenCV, NumPy).
Definition: kgraph-data.h:200
virtual float operator()(unsigned i, unsigned j) const
Computes similarity.
Definition: kgraph-data.h:280
static float apply(T const *t1, T const *t2, unsigned dim)
L2 square distance.
Definition: kgraph-data.h:50
virtual unsigned size() const
Returns the size of the dataset.
Definition: kgraph-data.h:267