Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Python wrapper #3

Open
wants to merge 42 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
021db22
Setup basic swig wrapper
grihabor Sep 9, 2018
8efa4ba
Update .gitignore
grihabor Sep 9, 2018
494bbc0
Apply numpy array typemap
grihabor Sep 9, 2018
1fa8c92
Ignore default arguments
grihabor Sep 12, 2018
f35d34b
Implement setup.py
grihabor Sep 12, 2018
e08267e
Write a wrapper for assign method to work correctly
grihabor Sep 12, 2018
ad99216
Link static dependency links
grihabor Sep 12, 2018
e79771b
Move source to src/
grihabor Sep 13, 2018
4042d21
Move interface file to src/
grihabor Sep 13, 2018
5b7c7b6
Fix cmake config for new project structure with src/
grihabor Sep 13, 2018
56dd88b
Create wrapper code for IndexIVF_HNSW::search method
grihabor Sep 13, 2018
1e5f727
Fix build_ext in setup.py
grihabor Sep 13, 2018
a7f9009
Reorganize directory structure
grihabor Sep 14, 2018
1c21976
Fix cmake build
grihabor Sep 14, 2018
f8ea6fb
Build swig extension in setup.py
grihabor Sep 14, 2018
01a7d17
`python setup.py install` completely works!
grihabor Sep 14, 2018
03a956c
Clean setup.py
grihabor Sep 14, 2018
507293e
Add numpy, pytest dependencies
grihabor Sep 14, 2018
4537e0e
Add python tests to run with pytest
grihabor Sep 14, 2018
e935e17
Build tests, add dummy test
grihabor Sep 15, 2018
97fd0ba
Build only ivfhnsw library in setup.py
grihabor Sep 15, 2018
a5a1955
Fix includes in tests
grihabor Sep 16, 2018
21c6673
Rename interface file, return cmake swig instructions back
grihabor Sep 16, 2018
d9f04ef
Add tests target to build all tests
grihabor Sep 16, 2018
b60fab6
Rename interface for the last time
grihabor Sep 16, 2018
4d14824
Move wrapper module into ivfhnsw package
grihabor Sep 17, 2018
1c715cc
Move search wrapper code to separate python module
grihabor Sep 17, 2018
f84925d
Add basic python test
grihabor Sep 18, 2018
5251711
Split the Parser into .h and .cpp files
grihabor Sep 18, 2018
9b78482
Add Proxy class for IndexIVF_HNSW_Grouping
grihabor Sep 18, 2018
e07d460
Remove unused pathlib imports
grihabor Sep 18, 2018
f520cb0
Fix interface for IndexIVF_HNSW::search
grihabor Sep 18, 2018
cd6a201
Add wrapper for IndexIVF_HNSW::add_batch
grihabor Sep 18, 2018
9c49f2f
Fix swig output directory in setup.py
grihabor Sep 18, 2018
9af69cc
Include numpy headers
grihabor Sep 18, 2018
0e8c7f3
Add numpy include
grihabor Sep 19, 2018
ab15d51
Use get_ext_fullpath to obtain right dirpath
grihabor Sep 19, 2018
207ba3d
Add basic import test
grihabor Sep 19, 2018
66fbddd
Fix test after IndexIVF_HNSW::search method change
grihabor Sep 19, 2018
ba73362
Patch setup.py to work with `python setup.py test`
grihabor Sep 19, 2018
313d308
Add support for CMAKE_BUILD_TYPE in setup.py
grihabor Sep 20, 2018
67f7f29
Add interface for IndexIVF_HNSW::train_pq
grihabor Sep 21, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,12 @@ CMakeCache.txt
main
*.swp

__pycache__/
lib/
numpy.i
build/
dist/
ivfhnsw.egg-info/
venv/
.eggs/
.pytest_cache/
38 changes: 29 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ cmake_minimum_required (VERSION 2.8)
# ivf-hnsw project
project(ivf-hnsw C CXX)

include_directories("${PROJECT_BINARY_DIR}")
message("Build type: ${CMAKE_BUILD_TYPE}")

add_subdirectory(faiss)
add_subdirectory(hnswlib)
Expand All @@ -13,14 +13,34 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

# specify header and cpp files
file(GLOB ivf-hnsw_cpu_headers ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
file(GLOB ivf-hnsw_cpu_cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/include)

add_library(ivf-hnsw STATIC ${ivf-hnsw_cpu_headers} ${ivf-hnsw_cpu_cpp})
file(GLOB ivfhnsw_src ${PROJECT_SOURCE_DIR}/src/*.cpp)
file(GLOB ivfhnsw_include ${PROJECT_SOURCE_DIR}/include/*.h)

SET( CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0" )
target_link_libraries(ivf-hnsw faiss hnswlib)
SET(CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0")

# build tests
add_subdirectory(tests)
add_library(ivfhnsw STATIC ${ivfhnsw_src})
target_link_libraries(ivfhnsw faiss hnswlib)

FIND_PACKAGE(SWIG)
FIND_PACKAGE(PythonLibs)
if(SWIG_FOUND AND PythonLibs_FOUND)
INCLUDE(${SWIG_USE_FILE})
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_PATH})

file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/numpy.i ./numpy.i)
file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/pyfragments.swg ./pyfragments.swg)

set(CMAKE_SWIG_OUTDIR ${CMAKE_BINARY_DIR}/lib)
set(SWIG_FEATURES "-Iinclude")
file(GLOB swig_interface interface/wrapper.i)

SET_SOURCE_FILES_PROPERTIES(${swig_interface} PROPERTIES CPLUSPLUS ON)

swig_add_module(wrapper python ${swig_interface} ${ivfhnsw_src})
swig_link_libraries(wrapper faiss hnswlib ${PYTHON_LIBRARIES})

endif()

add_subdirectory(tests)
2 changes: 1 addition & 1 deletion CMakeLists.txt.faiss
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ file(GLOB faiss_cpu_cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)

set(faiss_lib faiss)
add_library(${faiss_lib} STATIC ${faiss_cpu_headers} ${faiss_cpu_cpp})
target_link_libraries(${faiss_lib} ${OpenMP_CXX_FLAGS} ${BLAS_LIB})
target_link_libraries(${faiss_lib} ${OpenMP_CXX_FLAGS} ${BLAS_LIB})
4 changes: 2 additions & 2 deletions hnswlib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ file(GLOB headers ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
file(GLOB sources ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)

# Build each source file independently
include_directories(../../) # ivf-hnsw root directory
include_directories(${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/src) # ivf-hnsw root directory

add_library(hnswlib STATIC ${headers} ${sources})
SET( CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0" )
target_link_libraries(hnswlib)
target_link_libraries(hnswlib)
File renamed without changes.
File renamed without changes.
67 changes: 67 additions & 0 deletions include/Parser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#ifndef IVF_HNSW_LIB_PARSER_H
#define IVF_HNSW_LIB_PARSER_H

//==============
// Parser Class
//==============
struct Parser
{
const char *cmd; ///< main command - argv[0]

//=================
// HNSW parameters
//=================
size_t M; ///< Min number of edges per point
size_t efConstruction; ///< Max number of candidate vertices in priority queue to observe during construction

//=================
// Data parameters
//=================
size_t nb; ///< Number of base vectors
size_t nt; ///< Number of learn vectors
size_t nsubt; ///< Number of learn vectors to train (random subset of the learn set)
size_t nc; ///< Number of centroids for HNSW quantizer
size_t nsubc; ///< Number of subcentroids per group
size_t nq; ///< Number of queries
size_t ngt; ///< Number of groundtruth neighbours per query
size_t d; ///< Vector dimension

//=================
// PQ parameters
//=================
size_t code_size; ///< Code size per vector in bytes
bool do_opq; ///< Turn on/off OPQ fine encoding

//===================
// Search parameters
//===================
size_t k; ///< Number of the closest vertices to search
size_t nprobe; ///< Number of probes at query time
size_t max_codes; ///< Max number of codes to visit to do a query
size_t efSearch; ///< Max number of candidate vertices in priority queue to observe during searching
bool do_pruning; ///< Turn on/off pruning in the grouping scheme

//=======
// Paths
//=======
const char *path_base; ///< Path to a base set
const char *path_learn; ///< Path to a learn set
const char *path_q; ///< Path to queries
const char *path_gt; ///< Path to groundtruth
const char *path_centroids; ///< Path to coarse centroids

const char *path_precomputed_idxs; ///< Path to coarse centroid indices for base points

const char *path_info; ///< Path to parameters of HNSW graph
const char *path_edges; ///< Path to edges of HNSW graph

const char *path_pq; ///< Path to the product quantizer for residuals
const char *path_opq_matrix; ///< Path to OPQ rotation matrix for OPQ fine encoding
const char *path_norm_pq; ///< Path to the product quantizer for norms of reconstructed base points
const char *path_index; ///< Path to the constructed index

Parser(int argc, char **argv);
void usage();
};

#endif //IVF_HNSW_LIB_PARSER_H
File renamed without changes.
4 changes: 4 additions & 0 deletions interface/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*

!.gitignore
!wrapper.i
120 changes: 120 additions & 0 deletions interface/wrapper.i
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
%module wrapper
%{
#define SWIG_FILE_WITH_INIT
#include "IndexIVF_HNSW.h"
#include "IndexIVF_HNSW_Grouping.h"
%}

%include "numpy.i"

%init %{
import_array();
%}

%apply (float* IN_ARRAY1, int DIM1) {(const float *x, size_t d)};
%apply (float* IN_ARRAY2, int DIM1, int DIM2) {(const float *x, size_t n, size_t d)};
%apply (unsigned int* ARGOUT_ARRAY1, int DIM1) {(ivfhnsw::IndexIVF_HNSW::idx_t *labels, size_t k)};
%apply (unsigned int* IN_ARRAY1, int DIM1) {(const ivfhnsw::IndexIVF_HNSW::idx_t *xids, size_t n1)};
%apply (unsigned int* IN_ARRAY1, int DIM1) {(const ivfhnsw::IndexIVF_HNSW::idx_t *precomputed_idx, size_t n2)};
%apply (long* ARGOUT_ARRAY1, int DIM1) {(long *labels, size_t k)};
%apply (float* ARGOUT_ARRAY1, int DIM1) {(float* distances, size_t k_)};


/*
Wrapper for IndexIVF_HNSW::assign
*/
%rename (assign) assign_numpy;
%exception assign_numpy {
$action
if (PyErr_Occurred()) SWIG_fail;
}
%extend ivfhnsw::IndexIVF_HNSW {
void assign_numpy(const float *x, size_t n, size_t d, idx_t *labels, size_t k) {
if (d != $self->d) {
PyErr_Format(PyExc_ValueError,
"Query vectors must be of length d=%d, got %d",
$self->d, d);
return;
}
return $self->assign(n, x, labels, k);
}
}
%ignore assign;

/*
Wrapper for IndexIVF_HNSW::train_pq
*/
%exception train_pq {
$action
if (PyErr_Occurred()) SWIG_fail;
}
%extend ivfhnsw::IndexIVF_HNSW {
void train_pq(const float *x, size_t n, size_t d) {
if (d != $self->d) {
PyErr_Format(PyExc_ValueError,
"Query vectors must be of length d=%d, got %d",
$self->d, d);
return;
}
return $self->train_pq(n, x);
}
}
%ignore train_pq;


/*
Wrapper for IndexIVF_HNSW::search
*/
%exception search {
$action
if (PyErr_Occurred()) SWIG_fail;
}
%extend ivfhnsw::IndexIVF_HNSW {
void search(const float *x, size_t d, float* distances, size_t k_, long *labels, size_t k) {
if (d != $self->d) {
PyErr_Format(PyExc_ValueError,
"Query vectors must be of length d=%d, got %d",
$self->d, d);
return;
}
if (k != k_) {
PyErr_Format(PyExc_ValueError,
"Output sizes must be the same, got %d and %d",
k_, k);
return;
}
$self->search(k, x, distances, labels);
}
}
%ignore search;


/*
Wrapper for IndexIVF_HNSW::add_batch
*/
%exception add_batch {
$action
if (PyErr_Occurred()) SWIG_fail;
}
%extend ivfhnsw::IndexIVF_HNSW {
void add_batch(const float *x, size_t n, size_t d, const idx_t* xids, size_t n1, const idx_t *precomputed_idx, size_t n2) {
if (d != $self->d) {
PyErr_Format(PyExc_ValueError,
"Query vectors must be of length d=%d, got %d",
$self->d, d);
return;
}
if (!(n == n1 && n == n2)) {
PyErr_Format(PyExc_ValueError,
"Arrays must have the same first dimention size, got %d, %d, %d",
n, n1, n2);
return;
}
$self->add_batch(n, x, xids, precomputed_idx);
}
}
%ignore add_batch;

%include "IndexIVF_HNSW.h"
%include "IndexIVF_HNSW_Grouping.h"

1 change: 1 addition & 0 deletions python-src/ivfhnsw/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
wrapper.py
7 changes: 7 additions & 0 deletions python-src/ivfhnsw/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .index import Index
from .index_grouping import IndexGrouping

__all__ = (
'Index',
'IndexGrouping',
)
12 changes: 12 additions & 0 deletions python-src/ivfhnsw/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from .wrapper import IndexIVF_HNSW

class Index(IndexIVF_HNSW):
def search(self, x, k):
"""
Query n vectors of dimension d to the index.

Return at most k vectors. If there are not enough results for the query,
the result array is padded with -1s.
"""
return super().search(x, k, k)

6 changes: 6 additions & 0 deletions python-src/ivfhnsw/index_grouping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .wrapper import IndexIVF_HNSW_Grouping
from .index import Index


class IndexGrouping(IndexIVF_HNSW_Grouping, Index):
pass
18 changes: 18 additions & 0 deletions python-tests/test_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
def test_import_ivfhnsw():
import ivfhnsw


def test_lowlevel_constructor_and_destructor_wrappers():
from ivfhnsw import _wrapper
i = _wrapper.new_IndexIVF_HNSW(4,4,4,4)
_wrapper.delete_IndexIVF_HNSW(i)


def test_pipeline():
from ivfhnsw import Index
index = Index(4,4,4,4)
index.build_quantizer('', '', '')
index.assign([[5,5,5,5]], 2)
distances, labels = index.search([1,2,3,4], 3)
assert distances.shape[0] == 3
assert labels.shape[0] == 3
5 changes: 5 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[aliases]
test=pytest

[tool:pytest]
testpaths=python-tests
Loading