Skip to content

Commit

Permalink
Merge pull request #89 from Living-with-machines/develop
Browse files Browse the repository at this point in the history
v1.2.2
  • Loading branch information
kasra-hosseini authored Dec 4, 2020
2 parents 0b9f771 + f7efd05 commit db1bcf9
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 3,030 deletions.
14 changes: 9 additions & 5 deletions DeezyMatch/candidateRanker.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,14 +311,18 @@ def candidate_ranker(input_file_path="default", query_scenario=None, candidate_s
else:
sys.exit(f"[ERROR] ranking_metric: {ranking_metric} is not implemented. See the documentation.")

num_found_candidates += len(query_candidate_filtered_pd)
print("ID: %s/%s -- Number of found candidates so far: %s, searched: %s" % (iq+1, len(vecs_query), num_found_candidates, id_1_neigh))
if num_found_candidates > 0:
# remove duplicates
query_candidate_filtered_pd = query_candidate_filtered_pd[~query_candidate_filtered_pd.duplicated(["s2_orig"])]

if len(query_candidate_filtered_pd) > 0:
collect_neigh_pd = collect_neigh_pd.append(query_candidate_filtered_pd)
collect_neigh_pd = collect_neigh_pd[~collect_neigh_pd.duplicated(["s2_orig"])]

num_found_candidates = len(collect_neigh_pd)
print("ID: %s/%s -- Number of found candidates so far: %s, searched: %s" % (iq+1, len(vecs_query), num_found_candidates, id_1_neigh))

if ranking_metric.lower() in ["faiss"]:
# 1.01 is multiplied to avoid issues with float numbers and rounding erros
# 1.01 is multiplied to avoid issues with float numbers and rounding errors
if query_candidate_pd["faiss_dist"].max() > (selection_threshold*1.01):
break
elif ranking_metric.lower() in ["cosine"]:
Expand Down
3 changes: 2 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
MIT License

Copyright (c) 2020 Living with Machines
Copyright (c) 2020 The Alan Turing Institute, British Library Board, Queen Mary University of London,
University of Exeter, University of East Anglia and University of Cambridge

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
3,057 changes: 38 additions & 3,019 deletions examples/example_001.ipynb

Large diffs are not rendered by default.

20 changes: 15 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import setuptools

with open("requirements.txt", "r") as f:
REQUIRED_PACKAGES = f.read().splitlines()

setuptools.setup(
name="DeezyMatch",
version="1.2.1",
version="1.2.2",
description="A Flexible Deep Learning Approach to Fuzzy String Matching",
author=u"The LwM Development Team",
#author_email="",
Expand All @@ -20,7 +17,20 @@
include_package_data = True,
platforms="OS Independent",
python_requires='>=3.7',
install_requires=REQUIRED_PACKAGES,
install_requires=[
"torch==1.5.0",
"torchvision==0.6.0",
"ipywidgets==7.5.1",
"PyYAML==5.3.1",
"scikit-learn==0.23.1",
"pandas==1.0.3",
"faiss-cpu==1.6.3",
"tqdm==4.46.0",
"tensorboard==2.2.2",
"matplotlib==3.2.1",
"jupyter-client==6.1.5",
"jupyter-core==4.6.3"
],
classifiers=[
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: MIT License",
Expand Down

0 comments on commit db1bcf9

Please sign in to comment.