diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/404.html b/404.html new file mode 100644 index 00000000..c4e8260e --- /dev/null +++ b/404.html @@ -0,0 +1,615 @@ + + + +
+ + + + + + + + + + + + + + + + +Here you will learn how to use the fastembed package to embed your data into a vector space. The package is designed to be easy to use and fast. It is built on top of the ONNX standard, which allows for fast inference on a variety of hardware (called Runtimes in ONNX).
+The fastembed package is designed to be easy to use. The main class is the Embedding
class. It takes a list of strings as input and returns a list of vectors as output. The Embedding
class is initialized with a model file.
!pip install fastembed --upgrade --quiet # Install fastembed
+
Make the necessary imports, initialize the Embedding
class, and embed your data into vectors:
from typing import List
+import numpy as np
+from fastembed.embedding import DefaultEmbedding
+
+# Example list of documents
+documents: List[str] = [
+ "Hello, World!",
+ "This is an example document.",
+ "fastembed is supported by and maintained by Qdrant.",
+]
+# Initialize the DefaultEmbedding class
+embedding_model = DefaultEmbedding()
+embeddings: List[np.ndarray] = list(embedding_model.embed(documents))
+print(embeddings[0].shape)
+
Importing the required classes and modules:
+from typing import List
+import numpy as np
+from fastembed.embedding import DefaultEmbedding
+
Notice that we are using the DefaultEmbedding -- which is a quantized, state of the Art Flag Embedding model which beats OpenAI's Embedding by a large margin.
+You can define a list of documents that you'd like to embed. These can be sentences, paragraphs, or even entire documents.
+# Example list of documents
+documents: List[str] = [
+ "passage: Hello, World!",
+ "query: Hello, World!", # these are two different embedding
+ "passage: This is an example passage.",
+ # You can leave out the prefix but it's recommended
+ "fastembed is supported by and maintained by Qdrant.",
+]
+
Next, initialize the Embedding class with the desired parameters. Here, "BAAI/bge-small-en" is the pre-trained model name, and max_length=512 is the maximum token length for each document.
+This will download the model weights, decompress to directory local_cache
and load them into the Embedding class.
We will initialize Flag Embeddings with the model name and the maximum token length. That is the DefaultEmbedding class with the model name "BAAI/bge-small-en" and max_length=512.
+embedding_model = DefaultEmbedding()
+
Use the embed method of the embedding model to transform the documents into a List of np.array. The method returns a generator, so we cast it to a list to get the embeddings.
+embeddings: List[np.ndarray] = list(embedding_model.embed(documents))
+
You can print the shape of the embeddings to understand their dimensions. Typically, the shape will indicate the number of dimensions in the vector.
+print(embeddings[0].shape) # (384,) or similar output
+
{"use strict";/*!
+ * escape-html
+ * Copyright(c) 2012-2013 TJ Holowaychuk
+ * Copyright(c) 2015 Andreas Lubbe
+ * Copyright(c) 2015 Tiancheng "Timothy" Gu
+ * MIT Licensed
+ */var Va=/["'&<>]/;qn.exports=za;function za(e){var t=""+e,r=Va.exec(t);if(!r)return t;var o,n="",i=0,s=0;for(i=r.index;i