Skip to content

Commit

Permalink
docs: supported models update
Browse files Browse the repository at this point in the history
  • Loading branch information
Anush008 committed Jan 31, 2024
1 parent 565fbf3 commit 7ff857c
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 57 deletions.
142 changes: 86 additions & 56 deletions docs/examples/Supported_Models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -45,47 +45,47 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>BAAI/bge-base-en</td>\n",
" <td>768</td>\n",
" <td>Base English model</td>\n",
" <td>0.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>BAAI/bge-base-en-v1.5</td>\n",
" <td>768</td>\n",
" <td>Base English model, v1.5</td>\n",
" <td>0.44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>BAAI/bge-large-en-v1.5</td>\n",
" <td>1024</td>\n",
" <td>Large English model, v1.5</td>\n",
" <td>1.34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BAAI/bge-small-en</td>\n",
" <td>384</td>\n",
" <td>Fast English model</td>\n",
" <td>0.20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>4</th>\n",
" <td>BAAI/bge-small-en-v1.5</td>\n",
" <td>384</td>\n",
" <td>Fast and Default English model</td>\n",
" <td>0.13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>5</th>\n",
" <td>BAAI/bge-small-zh-v1.5</td>\n",
" <td>512</td>\n",
" <td>Fast and recommended Chinese model</td>\n",
" <td>0.10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>BAAI/bge-base-en</td>\n",
" <td>768</td>\n",
" <td>Base English model</td>\n",
" <td>0.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>BAAI/bge-base-en-v1.5</td>\n",
" <td>768</td>\n",
" <td>Base English model, v1.5</td>\n",
" <td>0.44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>sentence-transformers/all-MiniLM-L6-v2</td>\n",
" <td>384</td>\n",
" <td>Sentence Transformer model, MiniLM-L6-v2</td>\n",
" <td>0.09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>intfloat/multilingual-e5-large</td>\n",
" <td>1024</td>\n",
Expand All @@ -106,46 +106,76 @@
" <td>English embedding model supporting 8192 sequence length</td>\n",
" <td>0.13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>sentence-transformers/all-MiniLM-L6-v2</td>\n",
" <td>384</td>\n",
" <td>Sentence Transformer model, MiniLM-L6-v2</td>\n",
" <td>0.09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>xenova/multilingual-e5-large</td>\n",
" <td>1024</td>\n",
" <td>Multilingual model. Recommended for non-English languages</td>\n",
" <td>2.24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>xenova/paraphrase-multilingual-mpnet-base-v2</td>\n",
" <td>768</td>\n",
" <td>Sentence-transformers model for tasks like clustering or semantic search</td>\n",
" <td>1.11</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" model dim \\\n",
"0 BAAI/bge-small-en 384 \n",
"1 BAAI/bge-small-en-v1.5 384 \n",
"2 BAAI/bge-small-zh-v1.5 512 \n",
"3 BAAI/bge-base-en 768 \n",
"4 BAAI/bge-base-en-v1.5 768 \n",
"5 sentence-transformers/all-MiniLM-L6-v2 384 \n",
"6 intfloat/multilingual-e5-large 1024 \n",
"7 jinaai/jina-embeddings-v2-base-en 768 \n",
"8 jinaai/jina-embeddings-v2-small-en 512 \n",
" model dim \\\n",
"0 BAAI/bge-base-en 768 \n",
"1 BAAI/bge-base-en-v1.5 768 \n",
"2 BAAI/bge-large-en-v1.5 1024 \n",
"3 BAAI/bge-small-en 384 \n",
"4 BAAI/bge-small-en-v1.5 384 \n",
"5 BAAI/bge-small-zh-v1.5 512 \n",
"6 intfloat/multilingual-e5-large 1024 \n",
"7 jinaai/jina-embeddings-v2-base-en 768 \n",
"8 jinaai/jina-embeddings-v2-small-en 512 \n",
"9 sentence-transformers/all-MiniLM-L6-v2 384 \n",
"10 xenova/multilingual-e5-large 1024 \n",
"11 xenova/paraphrase-multilingual-mpnet-base-v2 768 \n",
"\n",
" description \\\n",
"0 Fast English model \n",
"1 Fast and Default English model \n",
"2 Fast and recommended Chinese model \n",
"3 Base English model \n",
"4 Base English model, v1.5 \n",
"5 Sentence Transformer model, MiniLM-L6-v2 \n",
"6 Multilingual model, e5-large. Recommend using this model for non-English languages \n",
"7 English embedding model supporting 8192 sequence length \n",
"8 English embedding model supporting 8192 sequence length \n",
" description \\\n",
"0 Base English model \n",
"1 Base English model, v1.5 \n",
"2 Large English model, v1.5 \n",
"3 Fast English model \n",
"4 Fast and Default English model \n",
"5 Fast and recommended Chinese model \n",
"6 Multilingual model, e5-large. Recommend using this model for non-English languages \n",
"7 English embedding model supporting 8192 sequence length \n",
"8 English embedding model supporting 8192 sequence length \n",
"9 Sentence Transformer model, MiniLM-L6-v2 \n",
"10 Multilingual model. Recommended for non-English languages \n",
"11 Sentence-transformers model for tasks like clustering or semantic search \n",
"\n",
" size_in_GB \n",
"0 0.20 \n",
"1 0.13 \n",
"2 0.10 \n",
"3 0.50 \n",
"4 0.44 \n",
"5 0.09 \n",
"6 2.24 \n",
"7 0.55 \n",
"8 0.13 "
" size_in_GB \n",
"0 0.50 \n",
"1 0.44 \n",
"2 1.34 \n",
"3 0.20 \n",
"4 0.13 \n",
"5 0.10 \n",
"6 2.24 \n",
"7 0.55 \n",
"8 0.13 \n",
"9 0.09 \n",
"10 2.24 \n",
"11 1.11 "
]
},
"execution_count": 1,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -175,7 +205,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
"version": "3.11.7"
},
"orig_nbformat": 4
},
Expand Down
4 changes: 3 additions & 1 deletion fastembed/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,9 @@ def embed(self, texts: Iterable[str], batch_size: int = 256, parallel: int = Non
raise NotImplementedError

@classmethod
def list_supported_models(cls, exclude: List[str] = []) -> List[Dict[str, Any]]:
def list_supported_models(
cls, exclude: List[str] = ["compressed_url_sources", "hf_sources"]
) -> List[Dict[str, Any]]:
"""Lists the supported models.

Args:
Expand Down

0 comments on commit 7ff857c

Please sign in to comment.