Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Neurips23] NGT Submission for OOD track #187

Merged
merged 7 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/neurips23.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ jobs:
- algorithm: mysteryann-dif
dataset: random-xs
track: ood
- algorithm: ngt
dataset: random-xs
track: ood
- algorithm: pyanns
dataset: random-xs
track: ood
Expand Down
12 changes: 12 additions & 0 deletions neurips23/ood/ngt/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM neurips23

RUN apt-get update
RUN apt-get install -y git cmake liblapack-dev bc
RUN pip3 install wheel pybind11
RUN git clone https://github.com/masajiro/NGT-neurips23.git NGT
RUN cd NGT && git log -n 1
RUN cd NGT && mkdir build && cd build && cmake ..
RUN cd NGT/build && make -j 8 && make install
RUN ldconfig
RUN cd NGT/python && python3 setup.py bdist_wheel
RUN pip3 install NGT/python/dist/ngt-*-linux_x86_64.whl
34 changes: 34 additions & 0 deletions neurips23/ood/ngt/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
random-xs:
ngt:
docker-tag: neurips23-ood-ngt
module: neurips23.ood.ngt.module
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"edge": 50, "outdegree": 10, "indegree": 100,
"epsilon": 0.1, "reduction": 0.39}]
# "url": "https://public-rlab.east.edge.storage-yahoo.jp/neurips23/indexes/onng-random-50-10-100-0.10-0.39.tgz"}]
query-args: |
[{"epsilon": 1.1}]
text2image-10M:
ngt:
docker-tag: neurips23-ood-ngt
module: neurips23.ood.ngt.module
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"edge": 140, "outdegree": 10, "indegree": 175,
"epsilon": 0.11, "reduction": 0.38}]
# "url": "https://public-rlab.east.edge.storage-yahoo.jp/neurips23/indexes/onng-text2image-140-10-180-0.10-0.39.tgz"}]
query-args: |
[{"epsilon": 1.010},
{"epsilon": 1.014},
{"epsilon": 1.016},
{"epsilon": 1.017},
{"epsilon": 1.018},
{"epsilon": 1.020},
{"epsilon": 1.025}]
213 changes: 213 additions & 0 deletions neurips23/ood/ngt/module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
import os
import subprocess
import time

from neurips23.ood.base import BaseOODANN
from benchmark.datasets import DATASETS, download_accelerated

import ngtpy

class NGT(BaseOODANN):
def __init__(self, metric, params):
metrics = {"euclidean": "2", "angular": "E", "ip": "i"}
self._params = params
self._edge_size = int(params["edge"])
self._outdegree = int(params["outdegree"])
self._indegree = int(params["indegree"])
self._metric = metrics[metric]
self._edge_size_for_search = int(params["search_edge"]) if "search_edge" in params.keys() else 0
self._build_time_limit = float(params["timeout"]) if "timeout" in params.keys() else 12
self._epsilon = float(params["epsilon"]) if "epsilon" in params.keys() else 0.1
self._reduction_range = float(params["reduction"]) if "reduction" in params.keys() else 1.8
print("ONNG: edge_size:", self._edge_size)
print("ONNG: outdegree:", self._outdegree)
print("ONNG: indegree=:", self._indegree)
print("ONNG: edge_size_for_search:", self._edge_size_for_search)
print("ONNG: epsilon:", self._epsilon)
print("ONNG: reduction range:", self._reduction_range)
print("ONNG: metric:", metric)

def get_title(self):
return "index-%s-%s-%s-%.2f-%.2f" % (
self._edge_size,
self._outdegree,
self._indegree,
self._epsilon,
self._reduction_range,
)

def set_index_path(self, dataset):
self._index_dir = os.path.join("data", "indices", "ood", "ngt", self.get_title())
self._index_path = os.path.join(self._index_dir, "onng")
self._sanng_path = os.path.join(self._index_dir, "sanng")
self._anng_path = os.path.join(self._index_dir, "anng-" + str(self._edge_size))

def fit(self, dataset):
print("ONNG: start indexing...")
ds = DATASETS[dataset]()
print("ONNG: dataset:", dataset)
print("ONNG: dataset str:", ds.__str__())
print("ONNG: distance:", ds.distance())
print("ONNG: dimension:", ds.d)
print("ONNG: type:", ds.dtype)
print("ONNG: nb:", ds.nb)
print("ONNG: dataset file name:", ds.get_dataset_fn())
print("ONNG: index path:", self._index_path)
self.set_index_path(dataset)
if not os.path.exists(self._index_dir):
os.makedirs(self._index_dir)
print("ONNG: index:", self._index_path)
dim = ds.d
if (not os.path.exists(self._index_path)) and (not os.path.exists(self._sanng_path)):
print("ONNG: create a sparse ANNG to optimize the graph.")
t = time.time()
args = [
"ngt",
"create",
"-v",
"-it",
"-p8",
"-b500",
"-ga",
"-of",
"-D" + self._metric,
"-d" + str(dim),
"-E5",
"-S-2",
"-e0.0",
"-P0",
"-B30",
"-T0",
self._sanng_path,
]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
print("ONNG: append for SANNG")
args = ["ngt", "append", "-mb",
"-n" + str(ds.nb),
self._sanng_path,
ds.get_dataset_fn()]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
print("ONNG: SANNG appending time(sec)=" + str(time.time() - t))
print("ONNG: build a sparse ANNG index.")
t = time.time()
args = ["ngt", "construct-graph", "-v", "-G-", "-E0.0", "-S100",
self._sanng_path]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
print("ONNG: SANNG index build time(sec)=", str(time.time() - t))

if (not os.path.exists(self._index_path)) and (not os.path.exists(self._anng_path)):
print("ONNG: build ANNG")
t = time.time()
args = [
"ngt",
"create",
"-v",
"-it",
"-p8",
"-b20",
"-ga",
"-of",
"-D" + self._metric,
"-d" + str(dim),
"-E17",
"-S" + str(self._edge_size_for_search),
"-e" + str(self._epsilon),
"-P0",
"-B30",
"-T" + str(self._build_time_limit),
self._anng_path,
]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
print("ONNG: degree adjustment")
t = time.time()
args = [
"ngt",
"construct-graph",
"-v",
"-Go",
"-T0",
"-P0",
"-N" + str(self._edge_size),
"-O" + str(self._outdegree),
"-I" + str(self._indegree),
self._anng_path,
self._sanng_path,
]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
print("ONNG: degree ajustment time(sec)=" + str(time.time() - t))
if not os.path.exists(self._index_path):
print("ONNG: shortcut reduction")
t = time.time()
args = [
"ngt",
"reconstruct-graph",
"-v",
"-R" + str(self._reduction_range),
"-mS",
"-Ps",
"-sp",
"-o0",
"-i0",
self._anng_path,
self._index_path,
]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
print("ONNG: shortcut reduction time(sec)=" + str(time.time() - t))
if os.path.exists(self._index_path):
print("ONNG: index already exists!", self._index_path)
t = time.time()
self.index = ngtpy.Index(self._index_path, read_only=True, tree_disabled=False)
self.indexName = self._index_path
print("ONNG: open time(sec)=" + str(time.time() - t))
else:
print("ONNG: something wrong...")
print("ONNG: end of fit")

def load_index(self, dataset):
self.set_index_path(dataset)
if not os.path.exists(self._index_path + "/grp"):
if "url" not in self._params:
return False
if not os.path.exists(self._index_dir):
os.makedirs(self._index_dir)
tar_file = self._index_path + ".tgz";
if not os.path.exists(tar_file):
print("ONNG: downloading the index... index={} => {}".format(self._params["url"], self._index_path))
download_accelerated(self._params["url"], tar_file, quiet=True)
args = ["tar", "zxf", tar_file, "-C", self._index_dir]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
args = ["rm", "-r", tar_file]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
os.makedirs(self._sanng_path)
os.makedirs(self._anng_path)

def set_query_arguments(self, query_args):
epsilon = query_args.get("epsilon", 1.0)
edge_size = query_args.get("edge", 0)
print("ONNG: edge_size:", edge_size)
print("ONNG: epsilon:", epsilon)
self.name = "ngt-onng(%s, %s, %s, %s, %s)" % (
self._edge_size,
self._outdegree,
self._indegree,
self._reduction_range,
epsilon,
)
epsilon = epsilon - 1.0
self.index.set(epsilon=epsilon, edge_size=edge_size)

def query(self, X, n):
self._results = ngtpy.BatchResults()
return self.index.batch_search(X, self._results, n, with_distance=False)

def get_results(self):
return self._results.get_ids()

Loading