diff --git a/neurips23/latitude-m2-medium.md b/neurips23/latitude-m2-medium.md index 7f259338..acd27159 100644 --- a/neurips23/latitude-m2-medium.md +++ b/neurips23/latitude-m2-medium.md @@ -26,6 +26,228 @@ Note: * The [NeurIPS2023](README.md) Docker-based eval limits the use of available underlying resources. ## Results + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
filtersparseood
rankalgorithmqpsstatusrankalgorithmqpsstatusrankalgorithmqpsstatus
1
pinecone
146721.241800ok1
pyanns
26858.518647ok1
pinecone-ood
76865.422871ok
2puck62257.694628ok2pinecone_smips12003.182367ok2pyanns55504.351628ok
3parlayivf55033.197938ok3shnsw8247.277083ok3sustech-ood28458.262883ok
4wm_filter20874.856888ok4nle2945.626091ok4mysteryann-dif27946.097391ok
5pyanns8988.021683ok5cufe84.975738ok5mysteryann26560.866584ok
6faissplus8493.264038ok6linscan60.756008ok6vamana19965.516521ok
7faiss7327.855358oksustech-whuerror7puck18960.224161ok
8cufe6318.461801okspmaterror8ngt11921.398938ok
zillizerrorzillizerror9epsearch7663.219028ok
dhqerror10diskann6406.820738ok
hwtl_sdu_anns_filtererror11cufe5404.492150ok
fdufilterdiskannerrorscannerror
zillizerror
puck-fizzerror
+ ### Track: Filter diff --git a/neurips23/latitude/.gitignore b/neurips23/latitude/.gitignore new file mode 100644 index 00000000..87620ac7 --- /dev/null +++ b/neurips23/latitude/.gitignore @@ -0,0 +1 @@ +.ipynb_checkpoints/ diff --git a/neurips23/latitude/analysis.ipynb b/neurips23/latitude/analysis.ipynb new file mode 100644 index 00000000..20951a7c --- /dev/null +++ b/neurips23/latitude/analysis.ipynb @@ -0,0 +1,837 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "21ac7846-3ebf-4f2a-b545-be471d0ce5a8", + "metadata": {}, + "outputs": [], + "source": [ + "# Relative path to competition data export file\n", + "CSV = \"data_export_m2-medium.csv\"\n", + "\n", + "# Relative path to competition directory\n", + "COMPETITION_DIR = \"..\" \n", + "\n", + "# Competition dataset names\n", + "DATASETS = []\n", + "\n", + "# Competition recall/ap threshold\n", + "RECALL_AP_THRESHOLD = 0.9\n", + "\n", + "# Markdown file\n", + "MARKDOWN = \"../latitude-m2-medium.md\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "936c9c86-f611-4b3f-ae6b-d9d02347a4e2", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "import glob\n", + "import math\n", + "import re\n", + "import helper\n", + "import importlib\n", + "from IPython.core.display import HTML" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7193be1a-a6de-43df-9f60-9243f5ecf473", + "metadata": {}, + "outputs": [], + "source": [ + "# read CSV\n", + "df = pd.read_csv( CSV )\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5c83a9db-fc3f-4813-91c8-a7a1275b086d", + "metadata": {}, + "outputs": [], + "source": [ + "# get track/dataset groups\n", + "grps = df.groupby([\"track\",\"dataset\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "303b19d1-d0a6-4f0c-9dd0-08879df9cdd6", + "metadata": {}, + "outputs": [], + "source": [ + "dfs = [] # accumulate individual track dataframes\n", + "\n", + "# iterate groups\n", + "for name, group in grps:\n", + "\n", + " # extract groupby track and dataset\n", + " track = name[0]\n", + " dataset = name[1]\n", + " \n", + " # produce track ranking\n", + " ranking_df = group[ group[\"recall/ap\"]>=RECALL_AP_THRESHOLD ]\\\n", + " .groupby([\"algorithm\"]) \\\n", + " .max(\"qps\") \\\n", + " .sort_values(\"qps\", ascending=False) \\\n", + " [[\"qps\",\"recall/ap\"]] \n", + " \n", + " # set 'status' column as 'qualified' for all surviving rows\n", + " ranking_df['status'] = 'ok'\n", + "\n", + " # return the algorithm index as a column\n", + " ranking_df.reset_index(inplace=True)\n", + " #print(ranking_df)\n", + " #print(list(ranking_df.index))\n", + "\n", + " # retrieve all participating track algorithm names via track algo subdirectory \n", + " track_dir = os.path.join( COMPETITION_DIR, \"%s/*/Dockerfile\" % track )\n", + " algos_participating = [os.path.basename(os.path.dirname(p)) for p in glob.glob( track_dir ) ]\n", + " \n", + " # compute difference of algo lists of track subdirs and algos in results - these did not qualify\n", + " algos_did_not_qualify = list( set(algos_participating) - set(list(ranking_df[\"algorithm\"])) )\n", + "\n", + " # append not-qualified algos to dataframe\n", + " for algo in algos_did_not_qualify:\n", + " ranking_df = pd.concat([ranking_df, pd.DataFrame([{'algorithm':algo,'status':'error'}])], ignore_index = True)\n", + "\n", + " # create a rank numeric column\n", + " ranking_df['rank'] = ranking_df.apply( lambda row: int(row.name)+1, axis=1)\n", + " ranking_df['rank'] = ranking_df['rank'].astype('Int64')\n", + "\n", + " # return the track and dataset as column\n", + " ranking_df['track'] = track\n", + " ranking_df['dataset'] = dataset\n", + " #print(ranking_df)\n", + "\n", + " dfs.append(ranking_df)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "51cf0a75-1acf-44de-b7f1-2cf030cadd94", + "metadata": {}, + "outputs": [], + "source": [ + "# combine all track dataframes\n", + "master_df = pd.concat(dfs)\n", + "# master_df" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c90fba89-728a-4f54-bb80-95834bd33005", + "metadata": {}, + "outputs": [], + "source": [ + "# pivot via 'rank' so that track rankings are parallel across columns\n", + "pivot_df = master_df.pivot_table(index=['rank'], \n", + " columns=['track'], \n", + " values=['algorithm','qps','status'],\n", + " aggfunc='first')\n", + "#pivot_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f4e617b4-112b-45e1-bc3b-2692d980d19f", + "metadata": {}, + "outputs": [], + "source": [ + "# group specific track columns\n", + "reorder_df = pivot_df[ [ ('algorithm','filter'), ( 'qps','filter' ), ( 'status','filter' ), \\\n", + " ('algorithm','sparse'), ( 'qps','sparse' ), ( 'status','sparse' ),\n", + " ('algorithm','ood'), ( 'qps','ood' ), ( 'status','ood' ) ] ]\n", + "#reorder_df" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "8d2653b4-f070-4761-8b5e-9a9fa03f19cc", + "metadata": {}, + "outputs": [], + "source": [ + "# swap hierarchical index for columns\n", + "swap_df = reorder_df.swaplevel(0,1, axis=1)\n", + "#swap_df" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d0accb53-c34b-418e-85a4-253c46cdb90b", + "metadata": {}, + "outputs": [], + "source": [ + "# add per track 'rank' column ensuring no rank to algorithms that did not qualify\n", + "for track in master_df['track'].unique():\n", + " qps = list( swap_df[ (track,'qps') ] )\n", + " track_ranking = map(lambda el: float('nan') if math.isnan(qps[el-1]) else el,\n", + " range(1,len(qps)+1) )\n", + " #print(track_ranking, list(ranking))\n", + " col_to_find = (track, 'algorithm')\n", + " col_idx = list(swap_df.columns).index(col_to_find)\n", + "\n", + " swap_df.insert(col_idx, (track,'rank'), list(track_ranking) )\n", + " swap_df[(track,'rank')] = swap_df[(track,'rank')].astype('Int64')\n", + "#swap_df" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "5cee1395-0e41-4ba4-b586-f9c4b56a9024", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trackfiltersparseood
 rankalgorithmqpsstatusrankalgorithmqpsstatusrankalgorithmqpsstatus
rank            
11pinecone146721.241800ok1pyanns26858.518647ok1pinecone-ood76865.422871ok
22puck62257.694628ok2pinecone_smips12003.182367ok2pyanns55504.351628ok
33parlayivf55033.197938ok3shnsw8247.277083ok3sustech-ood28458.262883ok
44wm_filter20874.856888ok4nle2945.626091ok4mysteryann-dif27946.097391ok
55pyanns8988.021683ok5cufe84.975738ok5mysteryann26560.866584ok
66faissplus8493.264038ok6linscan60.756008ok6vamana19965.516521ok
77faiss7327.855358okzilliznanerror7puck18960.224161ok
88cufe6318.461801oksustech-whunanerror8ngt11921.398938ok
9zilliznanerrorspmatnanerror9epsearch7663.219028ok
10dhqnanerrornannannan10diskann6406.820738ok
11fdufilterdiskannnanerrornannannan11cufe5404.492150ok
12hwtl_sdu_anns_filternanerrornannannanzilliznanerror
13nannannannannannanpuck-fizznanerror
14nannannannannannanscannnanerror
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# style the dataframe (getting ready for html export)\n", + "\n", + "display_df_styled = swap_df.style\n", + "display_df_styled" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "884f5113-01a9-42a7-9262-5cea8e9cd751", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "filter pinecone\n", + "sparse pyanns\n", + "ood pinecone-ood\n", + "filter puck\n", + "sparse pinecone_smips\n", + "ood pyanns\n", + "filter parlayivf\n", + "sparse shnsw\n", + "ood sustech-ood\n", + "filter wm_filter\n", + "sparse nle\n", + "ood mysteryann-dif\n", + "filter pyanns\n", + "sparse cufe\n", + "ood mysteryann\n", + "filter faissplus\n", + "sparse linscan\n", + "ood vamana\n", + "filter faiss\n", + "sparse zilliz\n", + "ood puck\n", + "filter cufe\n", + "sparse sustech-whu\n", + "ood ngt\n", + "filter zilliz\n", + "sparse spmat\n", + "ood epsearch\n", + "filter dhq\n", + "sparse \n", + "ood diskann\n", + "filter fdufilterdiskann\n", + "sparse \n", + "ood cufe\n", + "filter hwtl_sdu_anns_filter\n", + "sparse \n", + "ood zilliz\n", + "filter \n", + "sparse \n", + "ood puck-fizz\n", + "filter \n", + "sparse \n", + "ood scann\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
filtersparseood
rankalgorithmqpsstatusrankalgorithmqpsstatusrankalgorithmqpsstatus
1
pinecone
146721.241800ok1
pyanns
26858.518647ok1
pinecone-ood
76865.422871ok
2
puck
62257.694628ok2
pinecone_smips
12003.182367ok2
pyanns
55504.351628ok
3
parlayivf
55033.197938ok3
shnsw
8247.277083ok3
sustech-ood
28458.262883ok
4
wm_filter
20874.856888ok4
nle
2945.626091ok4
mysteryann-dif
27946.097391ok
5
pyanns
8988.021683ok5
cufe
84.975738ok5
mysteryann
26560.866584ok
6
faissplus
8493.264038ok6
linscan
60.756008ok6
vamana
19965.516521ok
7
faiss
7327.855358ok
zilliz
error7
puck
18960.224161ok
8
cufe
6318.461801ok
sustech-whu
error8
ngt
11921.398938ok
zilliz
error
spmat
error9
epsearch
7663.219028ok
dhq
error10
diskann
6406.820738ok
fdufilterdiskann
error11
cufe
5404.492150ok
hwtl_sdu_anns_filter
error
zilliz
error
puck-fizz
error
scann
error
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# convert styled table to html string\n", + "\n", + "importlib.reload(helper) # need to reload if editing simultaneuosly\n", + "\n", + "# hide row index and export to html\n", + "html = display_df_styled.hide(axis=0).to_html()\n", + "\n", + "# remove style tag (markdown doesn't use it anyway)\n", + "new_html = helper.remove_style_prefix(html)\n", + "\n", + "# replace all \"nan\"s with empty string\n", + "new_html = new_html.replace(\"nan\",\"\")\n", + "\n", + "# insert links for specific use cases (ie, errors)\n", + "new_html = helper.replace_table_with_links(new_html, swap_df)\n", + "\n", + "# display in-line\n", + "HTML(new_html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25fc371b-1a14-4fca-97de-acd79070c0ad", + "metadata": {}, + "outputs": [], + "source": [ + "# insert html into the markdown\n", + "\n", + "with open(MARKDOWN,'r') as md:\n", + " contents = md.read()\n", + " \n", + "# locate insertion point via regex\n", + "regexpr = \"(## Results\\n)\"\n", + "matches = re.search(regexpr, contents, re.M)\n", + "\n", + "# insert table html\n", + "new_contents = contents[0:matches.span(0)[1]] + new_html +\" \\n\" + contents[matches.span(0)[1]:]\n", + "#print(new_contents[0:1000])\n", + "\n", + "# update the markdown\n", + "with open(MARKDOWN,'w') as md:\n", + " contents = md.write(new_contents)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f6622a5-db56-4eeb-b254-5002ed0ea098", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/neurips23/latitude/commands/.ipynb_checkpoints/filter__zilliz-checkpoint.sh b/neurips23/latitude/commands/.ipynb_checkpoints/filter__zilliz-checkpoint.sh new file mode 100644 index 00000000..e2832da1 --- /dev/null +++ b/neurips23/latitude/commands/.ipynb_checkpoints/filter__zilliz-checkpoint.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm zilliz +python3 run.py --dataset yfcc-10M --algorithm zilliz --neurips23track filter diff --git a/neurips23/latitude/commands/filter__cufe.sh b/neurips23/latitude/commands/filter__cufe.sh new file mode 100644 index 00000000..1a5c4933 --- /dev/null +++ b/neurips23/latitude/commands/filter__cufe.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm cufe +python3 run.py --dataset yfcc-10M --algorithm cufe --neurips23track filter diff --git a/neurips23/latitude/commands/filter__dhq.sh b/neurips23/latitude/commands/filter__dhq.sh new file mode 100644 index 00000000..060b717f --- /dev/null +++ b/neurips23/latitude/commands/filter__dhq.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm dhq +python3 run.py --dataset yfcc-10M --algorithm dhq --neurips23track filter diff --git a/neurips23/latitude/commands/filter__faiss.sh b/neurips23/latitude/commands/filter__faiss.sh new file mode 100644 index 00000000..7403ef4a --- /dev/null +++ b/neurips23/latitude/commands/filter__faiss.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm faiss +python3 run.py --dataset yfcc-10M --algorithm faiss --neurips23track filter diff --git a/neurips23/latitude/commands/filter__faissplus.sh b/neurips23/latitude/commands/filter__faissplus.sh new file mode 100644 index 00000000..6f547fba --- /dev/null +++ b/neurips23/latitude/commands/filter__faissplus.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm faissplus +python3 run.py --dataset yfcc-10M --algorithm faissplus --neurips23track filter diff --git a/neurips23/latitude/commands/filter__fdufilterdiskann.sh b/neurips23/latitude/commands/filter__fdufilterdiskann.sh new file mode 100644 index 00000000..d2594737 --- /dev/null +++ b/neurips23/latitude/commands/filter__fdufilterdiskann.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm fdufilterdiskann +python3 run.py --dataset yfcc-10M --algorithm fdufilterdiskann --neurips23track filter diff --git a/neurips23/latitude/commands/filter__hwtl_sdu_anns_filter.sh b/neurips23/latitude/commands/filter__hwtl_sdu_anns_filter.sh new file mode 100644 index 00000000..8d5bfa09 --- /dev/null +++ b/neurips23/latitude/commands/filter__hwtl_sdu_anns_filter.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm hwtl_sdu_anns_filter +python3 run.py --dataset yfcc-10M --algorithm hwtl_sdu_anns_filter --neurips23track filter diff --git a/neurips23/latitude/commands/filter__parlayivf.sh b/neurips23/latitude/commands/filter__parlayivf.sh new file mode 100644 index 00000000..b42dd660 --- /dev/null +++ b/neurips23/latitude/commands/filter__parlayivf.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm parlayivf +python3 run.py --dataset yfcc-10M --algorithm parlayivf --neurips23track filter diff --git a/neurips23/latitude/commands/filter__pinecone.sh b/neurips23/latitude/commands/filter__pinecone.sh new file mode 100644 index 00000000..64c4ab90 --- /dev/null +++ b/neurips23/latitude/commands/filter__pinecone.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm pinecone +python3 run.py --dataset yfcc-10M --algorithm pinecone --neurips23track filter diff --git a/neurips23/latitude/commands/filter__puck.sh b/neurips23/latitude/commands/filter__puck.sh new file mode 100644 index 00000000..99dea7ab --- /dev/null +++ b/neurips23/latitude/commands/filter__puck.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm puck +python3 run.py --dataset yfcc-10M --algorithm puck --neurips23track filter diff --git a/neurips23/latitude/commands/filter__pyanns.sh b/neurips23/latitude/commands/filter__pyanns.sh new file mode 100644 index 00000000..bd122c37 --- /dev/null +++ b/neurips23/latitude/commands/filter__pyanns.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm pyanns +python3 run.py --dataset yfcc-10M --algorithm pyanns --neurips23track filter diff --git a/neurips23/latitude/commands/filter__wm_filter.sh b/neurips23/latitude/commands/filter__wm_filter.sh new file mode 100644 index 00000000..7ef1081f --- /dev/null +++ b/neurips23/latitude/commands/filter__wm_filter.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm wm_filter +python3 run.py --dataset yfcc-10M --algorithm wm_filter --neurips23track filter diff --git a/neurips23/latitude/commands/filter__zilliz.sh b/neurips23/latitude/commands/filter__zilliz.sh new file mode 100644 index 00000000..e2832da1 --- /dev/null +++ b/neurips23/latitude/commands/filter__zilliz.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track filter --algorithm zilliz +python3 run.py --dataset yfcc-10M --algorithm zilliz --neurips23track filter diff --git a/neurips23/latitude/commands/ood__cufe.sh b/neurips23/latitude/commands/ood__cufe.sh new file mode 100644 index 00000000..dbd07251 --- /dev/null +++ b/neurips23/latitude/commands/ood__cufe.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm cufe +python3 run.py --dataset text2image-10M --algorithm cufe --neurips23track ood diff --git a/neurips23/latitude/commands/ood__diskann.sh b/neurips23/latitude/commands/ood__diskann.sh new file mode 100644 index 00000000..65da8bd8 --- /dev/null +++ b/neurips23/latitude/commands/ood__diskann.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm diskann +python3 run.py --dataset text2image-10M --algorithm diskann --neurips23track ood diff --git a/neurips23/latitude/commands/ood__epsearch.sh b/neurips23/latitude/commands/ood__epsearch.sh new file mode 100644 index 00000000..1a7dc642 --- /dev/null +++ b/neurips23/latitude/commands/ood__epsearch.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm epsearch +python3 run.py --dataset text2image-10M --algorithm epsearch --neurips23track ood diff --git a/neurips23/latitude/commands/ood__mysteryann-dif.sh b/neurips23/latitude/commands/ood__mysteryann-dif.sh new file mode 100644 index 00000000..09db4d74 --- /dev/null +++ b/neurips23/latitude/commands/ood__mysteryann-dif.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm mysteryann-dif +python3 run.py --dataset text2image-10M --algorithm mysteryann-dif --neurips23track ood diff --git a/neurips23/latitude/commands/ood__mysteryann.sh b/neurips23/latitude/commands/ood__mysteryann.sh new file mode 100644 index 00000000..6006fb89 --- /dev/null +++ b/neurips23/latitude/commands/ood__mysteryann.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm mysteryann +python3 run.py --dataset text2image-10M --algorithm mysteryann --neurips23track ood diff --git a/neurips23/latitude/commands/ood__ngt.sh b/neurips23/latitude/commands/ood__ngt.sh new file mode 100644 index 00000000..f69b688b --- /dev/null +++ b/neurips23/latitude/commands/ood__ngt.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm ngt +python3 run.py --dataset text2image-10M --algorithm ngt --neurips23track ood diff --git a/neurips23/latitude/commands/ood__pinecone-ood.sh b/neurips23/latitude/commands/ood__pinecone-ood.sh new file mode 100644 index 00000000..a9251fd8 --- /dev/null +++ b/neurips23/latitude/commands/ood__pinecone-ood.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm pinecone-ood +python3 run.py --dataset text2image-10M --algorithm pinecone-ood --neurips23track ood diff --git a/neurips23/latitude/commands/ood__puck-fizz.sh b/neurips23/latitude/commands/ood__puck-fizz.sh new file mode 100644 index 00000000..5b12c496 --- /dev/null +++ b/neurips23/latitude/commands/ood__puck-fizz.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm puck-fizz +python3 run.py --dataset text2image-10M --algorithm puck-fizz --neurips23track ood diff --git a/neurips23/latitude/commands/ood__puck.sh b/neurips23/latitude/commands/ood__puck.sh new file mode 100644 index 00000000..8742985d --- /dev/null +++ b/neurips23/latitude/commands/ood__puck.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm puck +python3 run.py --dataset text2image-10M --algorithm puck --neurips23track ood diff --git a/neurips23/latitude/commands/ood__pyanns.sh b/neurips23/latitude/commands/ood__pyanns.sh new file mode 100644 index 00000000..db1f595c --- /dev/null +++ b/neurips23/latitude/commands/ood__pyanns.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm pyanns +python3 run.py --dataset text2image-10M --algorithm pyanns --neurips23track ood diff --git a/neurips23/latitude/commands/ood__scann.sh b/neurips23/latitude/commands/ood__scann.sh new file mode 100644 index 00000000..8b608f96 --- /dev/null +++ b/neurips23/latitude/commands/ood__scann.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm scann +python3 run.py --dataset text2image-10M --algorithm scann --neurips23track ood diff --git a/neurips23/latitude/commands/ood__sustech-ood.sh b/neurips23/latitude/commands/ood__sustech-ood.sh new file mode 100644 index 00000000..91e04eb1 --- /dev/null +++ b/neurips23/latitude/commands/ood__sustech-ood.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm sustech-ood +python3 run.py --dataset text2image-10M --algorithm sustech-ood --neurips23track ood diff --git a/neurips23/latitude/commands/ood__vamana.sh b/neurips23/latitude/commands/ood__vamana.sh new file mode 100644 index 00000000..5924ffb8 --- /dev/null +++ b/neurips23/latitude/commands/ood__vamana.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm vamana +python3 run.py --dataset text2image-10M --algorithm vamana --neurips23track ood diff --git a/neurips23/latitude/commands/ood__zilliz.sh b/neurips23/latitude/commands/ood__zilliz.sh new file mode 100644 index 00000000..fd91d5eb --- /dev/null +++ b/neurips23/latitude/commands/ood__zilliz.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track ood --algorithm zilliz +python3 run.py --dataset text2image-10M --algorithm zilliz --neurips23track ood diff --git a/neurips23/latitude/commands/sparse__cufe.sh b/neurips23/latitude/commands/sparse__cufe.sh new file mode 100644 index 00000000..acfaa284 --- /dev/null +++ b/neurips23/latitude/commands/sparse__cufe.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track sparse --algorithm cufe +python3 run.py --dataset sparse-full --algorithm cufe --neurips23track sparse diff --git a/neurips23/latitude/commands/sparse__linscan.sh b/neurips23/latitude/commands/sparse__linscan.sh new file mode 100644 index 00000000..e0b99868 --- /dev/null +++ b/neurips23/latitude/commands/sparse__linscan.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track sparse --algorithm linscan +python3 run.py --dataset sparse-full --algorithm linscan --neurips23track sparse diff --git a/neurips23/latitude/commands/sparse__nle.sh b/neurips23/latitude/commands/sparse__nle.sh new file mode 100644 index 00000000..3a364e5e --- /dev/null +++ b/neurips23/latitude/commands/sparse__nle.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track sparse --algorithm nle +python3 run.py --dataset sparse-full --algorithm nle --neurips23track sparse diff --git a/neurips23/latitude/commands/sparse__pinecone_smips.sh b/neurips23/latitude/commands/sparse__pinecone_smips.sh new file mode 100644 index 00000000..c2c2a199 --- /dev/null +++ b/neurips23/latitude/commands/sparse__pinecone_smips.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track sparse --algorithm pinecone_smips +python3 run.py --dataset sparse-full --algorithm pinecone_smips --neurips23track sparse diff --git a/neurips23/latitude/commands/sparse__pyanns.sh b/neurips23/latitude/commands/sparse__pyanns.sh new file mode 100644 index 00000000..c6e83cdd --- /dev/null +++ b/neurips23/latitude/commands/sparse__pyanns.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track sparse --algorithm pyanns +python3 run.py --dataset sparse-full --algorithm pyanns --neurips23track sparse diff --git a/neurips23/latitude/commands/sparse__shnsw.sh b/neurips23/latitude/commands/sparse__shnsw.sh new file mode 100644 index 00000000..4e0c0296 --- /dev/null +++ b/neurips23/latitude/commands/sparse__shnsw.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track sparse --algorithm shnsw +python3 run.py --dataset sparse-full --algorithm shnsw --neurips23track sparse diff --git a/neurips23/latitude/commands/sparse__spmat.sh b/neurips23/latitude/commands/sparse__spmat.sh new file mode 100644 index 00000000..b1094ccc --- /dev/null +++ b/neurips23/latitude/commands/sparse__spmat.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track sparse --algorithm spmat +python3 run.py --dataset sparse-full --algorithm spmat --neurips23track sparse diff --git a/neurips23/latitude/commands/sparse__sustech-whu.sh b/neurips23/latitude/commands/sparse__sustech-whu.sh new file mode 100644 index 00000000..dae4c8b9 --- /dev/null +++ b/neurips23/latitude/commands/sparse__sustech-whu.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track sparse --algorithm sustech-whu +python3 run.py --dataset sparse-full --algorithm sustech-whu --neurips23track sparse diff --git a/neurips23/latitude/commands/sparse__zilliz.sh b/neurips23/latitude/commands/sparse__zilliz.sh new file mode 100644 index 00000000..e0fa484f --- /dev/null +++ b/neurips23/latitude/commands/sparse__zilliz.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track sparse --algorithm zilliz +python3 run.py --dataset sparse-full --algorithm zilliz --neurips23track sparse diff --git a/neurips23/latitude/commands/streaming__cufe.sh b/neurips23/latitude/commands/streaming__cufe.sh new file mode 100644 index 00000000..670489d5 --- /dev/null +++ b/neurips23/latitude/commands/streaming__cufe.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track streaming --algorithm cufe +python3 run.py --dataset msturing-30M-clustered --algorithm cufe --neurips23track streaming --runbook_file neurips23/streaming/final_runbook.yaml diff --git a/neurips23/latitude/commands/streaming__diskann.sh b/neurips23/latitude/commands/streaming__diskann.sh new file mode 100644 index 00000000..f2928f3d --- /dev/null +++ b/neurips23/latitude/commands/streaming__diskann.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track streaming --algorithm diskann +python3 run.py --dataset msturing-30M-clustered --algorithm diskann --neurips23track streaming --runbook_file neurips23/streaming/final_runbook.yaml diff --git a/neurips23/latitude/commands/streaming__hwtl_sdu_anns_stream.sh b/neurips23/latitude/commands/streaming__hwtl_sdu_anns_stream.sh new file mode 100644 index 00000000..f4a56754 --- /dev/null +++ b/neurips23/latitude/commands/streaming__hwtl_sdu_anns_stream.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track streaming --algorithm hwtl_sdu_anns_stream +python3 run.py --dataset msturing-30M-clustered --algorithm hwtl_sdu_anns_stream --neurips23track streaming --runbook_file neurips23/streaming/final_runbook.yaml diff --git a/neurips23/latitude/commands/streaming__pinecone.sh b/neurips23/latitude/commands/streaming__pinecone.sh new file mode 100644 index 00000000..e073ab76 --- /dev/null +++ b/neurips23/latitude/commands/streaming__pinecone.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track streaming --algorithm pinecone +python3 run.py --dataset msturing-30M-clustered --algorithm pinecone --neurips23track streaming --runbook_file neurips23/streaming/final_runbook.yaml diff --git a/neurips23/latitude/commands/streaming__puck.sh b/neurips23/latitude/commands/streaming__puck.sh new file mode 100644 index 00000000..87d84d30 --- /dev/null +++ b/neurips23/latitude/commands/streaming__puck.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track streaming --algorithm puck +python3 run.py --dataset msturing-30M-clustered --algorithm puck --neurips23track streaming --runbook_file neurips23/streaming/final_runbook.yaml diff --git a/neurips23/latitude/commands/streaming__pyanns.sh b/neurips23/latitude/commands/streaming__pyanns.sh new file mode 100644 index 00000000..90613340 --- /dev/null +++ b/neurips23/latitude/commands/streaming__pyanns.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track streaming --algorithm pyanns +python3 run.py --dataset msturing-30M-clustered --algorithm pyanns --neurips23track streaming --runbook_file neurips23/streaming/final_runbook.yaml diff --git a/neurips23/latitude/commands/streaming__scann.sh b/neurips23/latitude/commands/streaming__scann.sh new file mode 100644 index 00000000..93739787 --- /dev/null +++ b/neurips23/latitude/commands/streaming__scann.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -x # echo the command +set -e # stop script on error + +python install.py --neurips23track streaming --algorithm scann +python3 run.py --dataset msturing-30M-clustered --algorithm scann --neurips23track streaming --runbook_file neurips23/streaming/final_runbook.yaml diff --git a/neurips23/latitude/create_commands.py b/neurips23/latitude/create_commands.py new file mode 100644 index 00000000..9d87d0fa --- /dev/null +++ b/neurips23/latitude/create_commands.py @@ -0,0 +1,72 @@ +# +# This is a script which creates scripts for individual track algorithms +# + +import glob +import os + +# Relative location of all competition track directories +TRACK_DIRS = { \ + "sparse": "../sparse", \ + "filter": "../filter", \ + "ood": "../ood", \ + "streaming": "../streaming" \ +} + +# Track dataset mapping +TRACK_DATASETS = { \ + "sparse": "sparse-full", \ + "filter": "yfcc-10M", \ + "ood": "text2image-10M", \ + "streaming": "msturing-30M-clustered" \ +} + +# Extra run flags +EXTRA = { \ + "sparse": "", \ + "filter": "", \ + "ood": "", \ + "streaming": "--runbook_file neurips23/streaming/final_runbook.yaml" \ +} + +# Commands dir +CDIR = "commands" + +# Build command template +BUILD_CMD = "python install.py --neurips23track %s --algorithm %s" + +# Run command template +RUN_CMD = "python3 run.py --dataset %s --algorithm %s --neurips23track %s %s" + +if __name__ == "__main__": + + # iterate competition tracks + for track in TRACK_DIRS.keys(): + + track_dir = TRACK_DIRS[track] + + # retrieve all track participants + match = os.path.join( track_dir, "*/Dockerfile" ) + algos_participating = [os.path.basename(os.path.dirname(p)) for p in glob.glob( match ) ] + print("%s participants:" % track, algos_participating) + + # emit the track+algo bash script + for algo in algos_participating: + fname = os.path.join(CDIR, "%s__%s.sh" % (track, algo) ) + with open(fname, "w") as fd: + fd.write("#!/bin/bash\n") + fd.write("set -x # echo the command\n") + fd.write("set -e # stop script on error\n") + fd.write("\n") + fd.write( BUILD_CMD % ( track, algo ) + "\n" ) + fd.write( RUN_CMD % ( TRACK_DATASETS[track], algo, track, EXTRA[track] ) + "\n" ) + + print("Wrote %s" % fname) + +print("Done.") + + + + + + diff --git a/neurips23/latitude/err_streaming_pinecone.txt b/neurips23/latitude/errors/streaming__pinecone.txt similarity index 100% rename from neurips23/latitude/err_streaming_pinecone.txt rename to neurips23/latitude/errors/streaming__pinecone.txt diff --git a/neurips23/latitude/err_streaming_pyanns.txt b/neurips23/latitude/errors/streaming__pyanns.txt similarity index 100% rename from neurips23/latitude/err_streaming_pyanns.txt rename to neurips23/latitude/errors/streaming__pyanns.txt diff --git a/neurips23/latitude/helper.py b/neurips23/latitude/helper.py new file mode 100644 index 00000000..e15cdba9 --- /dev/null +++ b/neurips23/latitude/helper.py @@ -0,0 +1,50 @@ +import re +import os + +# the rel path to commands subdir +CDIR = "commands" + +# the rel path to errors subdir +EDIR = "errors" + +def remove_style_prefix(html): + '''This function removes any style tags which get autogenerated + by pandas html renderer but not recognized by github markdown.''' + regexpr = '(]*?>[^<]*?)' + matches = re.search(regexpr, html, re.M) + if matches: + new_html = html[0:matches.span()[0]] + \ + html[ matches.span()[1]+1:] + return new_html + else: + return html + +def replace_table_with_links(html, df): + '''This is a messy function but seems to work well. It replaces table cells with appropriate + links by iterating the rows and cols of table and locating the cell html via regex. It may + be better to use BeautifulSoup and related scraping/html helper library.''' + new_html = html + for i in range(df.shape[0]): + for j in range(df.shape[1]): + row = df.iloc[i] + col = df.columns[j] # get the column 'schema' + if col[1] == 'algorithm': + regexpr = '(?)([^<]*?)()' % (i,j) + matches = re.search(regexpr, new_html, re.M) + if not matches and len(matches.groups())!=3: + raise Exception("Cannot locate table cell") + track = col[0] + algo = matches.groups()[1] + print(track, algo) + if algo=="": continue + link_path = os.path.join( CDIR, "%s__%s.sh" % (track, algo) ) + if not os.path.exists(link_path): + raise Exception("Cannot locate run path" + link_path) + err_path = os.path.join( EDIR, "%s__%s.txt" % (track, algo) ) + if os.path.exists(err_path): + print("WARNING: Using error file path for %s/%s" % (track,algo)) + link_path = err_path + link = '
%s
' % algo + # replace cell content with the linked version + new_html = new_html[0:matches.span(2)[0]] + link + new_html[matches.span(3)[0]:] + return new_html \ No newline at end of file