diff --git a/chromadb/api/configuration.py b/chromadb/api/configuration.py index 7a8e1b04896..03488e92909 100644 --- a/chromadb/api/configuration.py +++ b/chromadb/api/configuration.py @@ -239,7 +239,7 @@ class HNSWConfigurationInternal(ConfigurationInternal): name="ef_search", validator=lambda value: isinstance(value, int) and value >= 1, is_static=False, - default_value=10, + default_value=100, ), "num_threads": ConfigurationDefinition( name="num_threads", @@ -328,7 +328,7 @@ def __init__( self, space: str = "l2", ef_construction: int = 100, - ef_search: int = 10, + ef_search: int = 100, num_threads: int = cpu_count(), M: int = 16, resize_factor: float = 1.2, diff --git a/chromadb/segment/impl/vector/hnsw_params.py b/chromadb/segment/impl/vector/hnsw_params.py index b12c4281508..4387f188edf 100644 --- a/chromadb/segment/impl/vector/hnsw_params.py +++ b/chromadb/segment/impl/vector/hnsw_params.py @@ -55,7 +55,7 @@ def __init__(self, metadata: Metadata): metadata = metadata or {} self.space = str(metadata.get("hnsw:space", "l2")) self.construction_ef = int(metadata.get("hnsw:construction_ef", 100)) - self.search_ef = int(metadata.get("hnsw:search_ef", 10)) + self.search_ef = int(metadata.get("hnsw:search_ef", 100)) self.M = int(metadata.get("hnsw:M", 16)) self.num_threads = int( metadata.get("hnsw:num_threads", multiprocessing.cpu_count()) diff --git a/docs/docs.trychroma.com/markdoc/content/docs/collections/configure.md b/docs/docs.trychroma.com/markdoc/content/docs/collections/configure.md index 7b2cf5b389e..05950cafde7 100644 --- a/docs/docs.trychroma.com/markdoc/content/docs/collections/configure.md +++ b/docs/docs.trychroma.com/markdoc/content/docs/collections/configure.md @@ -11,7 +11,7 @@ You can configure the embedding space of a collection by setting special keys on | Cosine similarity | `cosine` | {% Latex %} d = 1.0 - \\frac{\\sum\\left(A_i \\times B_i\\right)}{\\sqrt{\\sum\\left(A_i^2\\right)} \\cdot \\sqrt{\\sum\\left(B_i^2\\right)}} {% /Latex %} | * `hnsw:construction_ef` determines the size of the candidate list used to select neighbors during index creation. A higher value improves index quality at the cost of more memory and time, while a lower value speeds up construction with reduced accuracy. The default value is `100`. -* `hnsw:search_ef` determines the size of the dynamic candidate list used while searching for the nearest neighbors. A higher value improves recall and accuracy by exploring more potential neighbors but increases query time and computational cost, while a lower value results in faster but less accurate searches. The default value is `10`. +* `hnsw:search_ef` determines the size of the dynamic candidate list used while searching for the nearest neighbors. A higher value improves recall and accuracy by exploring more potential neighbors but increases query time and computational cost, while a lower value results in faster but less accurate searches. The default value is `100`. * `hnsw:M` is the maximum number of neighbors (connections) that each node in the graph can have during the construction of the index. A higher value results in a denser graph, leading to better recall and accuracy during searches but increases memory usage and construction time. A lower value creates a sparser graph, reducing memory usage and construction time but at the cost of lower search accuracy and recall. The default value is `16`. * `hnsw:num_threads` specifies the number of threads to use during index construction or search operations. The default value is `multiprocessing.cpu_count()` (available CPU cores).