Merge pull request kayzhu#1 from wuan/python3compatibility

added python 3 compatibility and travis tests
afranck64 · Mar 13, 2015 · c7b02f8 · c7b02f8
2 parents b07fd60 + 61eecbf
commit c7b02f8
Show file tree

Hide file tree

Showing 8 changed files with 139 additions and 111 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,6 @@
 # Python egg metadata, regenerated from source files by setuptools.
 /*.egg-info
 /*.egg
+
+# PyCharm-Files
+/.idea
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,12 @@
+services:
+  - redis-server
+language: python
+python:
+  - "2.6"
+  - "2.7"
+  - "3.3"
+  - "3.4"
+# command to install dependencies
+install: "pip install -r requirements.txt"
+# command to run tests
+script: nosetests
diff --git a/lshash/__init__.py b/lshash/__init__.py
@@ -9,4 +9,4 @@
 __license__ = 'MIT'
 __version__ = '0.0.4dev'
 
-from lshash import LSHash
+from .lshash import LSHash
diff --git a/lshash/lshash.py b/lshash/lshash.py
@@ -4,11 +4,18 @@
 # This module is part of lshash and is released under
 # the MIT License: http://www.opensource.org/licenses/mit-license.php
 
+import sys
+
+if sys.version_info[0] >= 3:
+    basestring = str
+else:
+    range = xrange
+
 import os
 import json
 import numpy as np
 
-from storage import storage
+from .storage import storage
 
 try:
     from bitarray import bitarray
@@ -92,7 +99,7 @@ def _init_uniform_planes(self):
                     self.uniform_planes = [t[1] for t in npzfiles]
             else:
                 self.uniform_planes = [self._generate_uniform_planes()
-                                       for _ in xrange(self.num_hashtables)]
+                                       for _ in range(self.num_hashtables)]
                 try:
                     np.savez_compressed(self.matrices_filename,
                                         *self.uniform_planes)
@@ -101,14 +108,14 @@ def _init_uniform_planes(self):
                     raise
         else:
             self.uniform_planes = [self._generate_uniform_planes()
-                                   for _ in xrange(self.num_hashtables)]
+                                   for _ in range(self.num_hashtables)]
 
     def _init_hashtables(self):
         """ Initialize the hash tables such that each record will be in the
         form of "[storage1, storage2, ...]" """
 
         self.hash_tables = [storage(self.storage_config, i)
-                            for i in xrange(self.num_hashtables)]
+                            for i in range(self.num_hashtables)]
 
     def _generate_uniform_planes(self):
         """ Generate uniformly distributed hyperplanes and return it as a 2D

diff --git a/lshash/storage.py b/lshash/storage.py
@@ -4,6 +4,8 @@
 # This module is part of lshash and is released under
 # the MIT License: http://www.opensource.org/licenses/mit-license.php
 
+from __future__ import unicode_literals
+
 import json
 
 try:
@@ -27,10 +29,6 @@ def storage(storage_config, index):
 
 
 class BaseStorage(object):
-    def __init__(self, config):
-        """ An abstract class used as an adapter printfor storages. """
-        raise NotImplementedError
-
     def keys(self):
         """ Returns a list of binary hashes that are used as dict keys. """
         raise NotImplementedError
@@ -81,14 +79,14 @@ def _list(self, key):
 
     def keys(self, pattern='*'):
         # return the keys BUT be agnostic with reference to the hash table
-        return [k.split('.')[1] for k in self.storage.keys(self.h_index + pattern)]
+        return [k.decode('ascii').split('.')[1] for k in self.storage.keys(self.h_index + pattern)]
 
     def append_val(self, key, val):
         self.storage.sadd(self._list(key), json.dumps(val))
 
     def get_list(self, key):
         _list = list(self.storage.smembers(self._list(key)))  # list elements are plain strings here
-        _list = [json.loads(el) for el in _list]  # transform strings into python tuples
+        _list = [json.loads(el.decode('ascii')) for el in _list]  # transform strings into python tuples
         for el in _list:
             # if len(el) is 2, then el[1] is the extra value associated to the element
             if len(el) == 2 and type(el[0]) == list:

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+numpy>=1.9.1
+redis==2.10.3
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/test_lsh.py b/tests/test_lsh.py
@@ -1,5 +1,6 @@
 import random
 import string
+from unittest import TestCase
 from redis import StrictRedis
 from pprint import pprint
 import sys
@@ -10,112 +11,117 @@
 # now we can use our lshash package and not the standard one
 from lshash import LSHash
 
-num_elements = 100
 
-els = []
-el_names = []
-for i in range(num_elements):
-    el = [random.randint(0, 100) for _ in xrange(8)]
-    elname = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
-    els.append(tuple(el))
-    el_names.append(elname)
+class TestLSHash(TestCase):
+    num_elements = 100
 
+    def setUp(self):
+        self.els = []
+        self.el_names = []
+        for i in range(self.num_elements):
+            el = [random.randint(0, 100) for _ in range(8)]
+            elname = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
+            self.els.append(tuple(el))
+            self.el_names.append(elname)
 
-def test_lshash():
-    lsh = LSHash(6, 8, 1)
-    for i in xrange(num_elements):
-        lsh.index(list(els[i]))
-        lsh.index(list(els[i]))  # multiple insertions
-    hasht = lsh.hash_tables[0]
-    itms = [hasht.get_list(k) for k in hasht.keys()]
-    for itm in itms:
-        assert itms.count(itm) == 1
-        for el in itm:
-            assert el in els
-    for el in els:
-        res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
-        # res is a tuple containing the vector and the distance
-        el_v, el_dist = res
-        assert el_v in els
-        assert el_dist == 0
-    del lsh
+    def test_lshash(self):
+        lsh = LSHash(6, 8, 1)
+        for i in range(self.num_elements):
+            lsh.index(list(self.els[i]))
+            lsh.index(list(self.els[i]))  # multiple insertions
+        hasht = lsh.hash_tables[0]
+        itms = [hasht.get_list(k) for k in hasht.keys()]
+        for itm in itms:
+            assert itms.count(itm) == 1
+            for el in itm:
+                assert el in self.els
+        for el in self.els:
+            res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
+            # res is a tuple containing the vector and the distance
+            el_v, el_dist = res
+            assert el_v in self.els
+            assert el_dist == 0
+        del lsh
 
-def test_lshash_extra_val():
-    lsh = LSHash(6, 8, 1)
-    for i in xrange(num_elements):
-        lsh.index(list(els[i]), el_names[i])
-    hasht = lsh.hash_tables[0]
-    itms = [hasht.get_list(k) for k in hasht.keys()]
-    for itm in itms:
-        for el in itm:
-            assert el[0] in els
-            assert el[1] in el_names
-    for el in els:
-        # res is a list, so we need to select the first entry only
-        res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
-        # vector an name are in the first element of the tuple res[0]
-        el_v, el_name = res[0]
-        # the distance is in the second element of the tuple
-        el_dist = res[1]
-        assert el_v in els
-        assert el_name in el_names
-        assert el_dist == 0
-    del lsh
+    def test_lshash_extra_val(self):
+        lsh = LSHash(6, 8, 1)
+        for i in range(self.num_elements):
+            lsh.index(list(self.els[i]), self.el_names[i])
+        hasht = lsh.hash_tables[0]
+        itms = [hasht.get_list(k) for k in hasht.keys()]
+        for itm in itms:
+            for el in itm:
+                assert el[0] in self.els
+                assert el[1] in self.el_names
+        for el in self.els:
+            # res is a list, so we need to select the first entry only
+            res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
+            # vector an name are in the first element of the tuple res[0]
+            el_v, el_name = res[0]
+            # the distance is in the second element of the tuple
+            el_dist = res[1]
+            assert el_v in self.els
+            assert el_name in self.el_names
+            assert el_dist == 0
+        del lsh
 
-def test_lshash_redis():
-    """
-    Test external lshash module
-    """
-    config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
-    sr = StrictRedis(**config['redis'])
-    sr.flushdb()
+    def test_lshash_redis(self):
+        """
+        Test external lshash module
+        """
+        config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
+        sr = StrictRedis(**config['redis'])
+        sr.flushdb()
 
-    lsh = LSHash(6, 8, 1, config)
-    for i in xrange(num_elements):
-        lsh.index(list(els[i]))
-        lsh.index(list(els[i]))  # multiple insertions should be prevented by the library
-    hasht = lsh.hash_tables[0]
-    itms = [hasht.get_list(k) for k in hasht.keys()]
-    for itm in itms:
-        for el in itm:
-            assert itms.count(itm) == 1  # have multiple insertions been prevented?
-            assert el in els
-    for el in els:
-        res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
-        el_v, el_dist = res
-        assert el_v in els
-        assert el_dist == 0
-    del lsh
-    sr.flushdb()
+        lsh = LSHash(6, 8, 1, config)
+        for i in range(self.num_elements):
+            lsh.index(list(self.els[i]))
+            lsh.index(list(self.els[i]))  # multiple insertions should be prevented by the library
 
-def test_lshash_redis_extra_val():
-    """
-    Test external lshash module
-    """
-    config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
-    sr = StrictRedis(**config['redis'])
-    sr.flushdb()
+        hasht = lsh.hash_tables[0]
+        itms = [hasht.get_list(k) for k in hasht.keys()]
 
-    lsh = LSHash(6, 8, 1, config)
-    for i in xrange(num_elements):
-        lsh.index(list(els[i]), el_names[i])
-        lsh.index(list(els[i]), el_names[i])  # multiple insertions
-    hasht = lsh.hash_tables[0]
-    itms = [hasht.get_list(k) for k in hasht.keys()]
-    for itm in itms:
-        assert itms.count(itm) == 1
-        for el in itm:
-            assert el[0] in els
-            assert el[1] in el_names
-    for el in els:
-        res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
-        # vector an name are in the first element of the tuple res[0]
-        el_v, el_name = res[0]
-        # the distance is in the second element of the tuple
-        el_dist = res[1]
-        assert el_v in els
-        assert el_name in el_names
-        assert el_dist == 0
-    del lsh
-    sr.flushdb()
+        for itm in itms:
+            for el in itm:
+                assert itms.count(itm) == 1  # have multiple insertions been prevented?
+                assert el in self.els
+
+        for el in self.els:
+            res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
+            el_v, el_dist = res
+            assert el_v in self.els
+            assert el_dist == 0
+        del lsh
+        sr.flushdb()
+
+    def test_lshash_redis_extra_val(self):
+        """
+        Test external lshash module
+        """
+        config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
+        sr = StrictRedis(**config['redis'])
+        sr.flushdb()
+
+        lsh = LSHash(6, 8, 1, config)
+        for i in range(self.num_elements):
+            lsh.index(list(self.els[i]), self.el_names[i])
+            lsh.index(list(self.els[i]), self.el_names[i])  # multiple insertions
+        hasht = lsh.hash_tables[0]
+        itms = [hasht.get_list(k) for k in hasht.keys()]
+        for itm in itms:
+            assert itms.count(itm) == 1
+            for el in itm:
+                assert el[0] in self.els
+                assert el[1] in self.el_names
+        for el in self.els:
+            res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
+            # vector an name are in the first element of the tuple res[0]
+            el_v, el_name = res[0]
+            # the distance is in the second element of the tuple
+            el_dist = res[1]
+            assert el_v in self.els
+            assert el_name in self.el_names
+            assert el_dist == 0
+        del lsh
+        sr.flushdb()