From be7073805cefead76b08a7758ac917e278622778 Mon Sep 17 00:00:00 2001 From: Paul Lam Date: Sun, 24 Sep 2023 06:29:37 +0900 Subject: [PATCH 1/5] refactor measure.time_function as a decorator --- mind_palace/measure.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mind_palace/measure.py b/mind_palace/measure.py index 598437c..94a8553 100644 --- a/mind_palace/measure.py +++ b/mind_palace/measure.py @@ -2,8 +2,12 @@ def time_function(func): - start_time = time.time() - result = func() - end_time = time.time() - elapsed_time = end_time - start_time - return result, elapsed_time + def timed_func(*args, **kwargs): + start_time = time.time() + result = func(*args, **kwargs) + end_time = time.time() + elapsed_sec = end_time - start_time + print(f"Elapsed time calling {func.__name__}(): {elapsed_sec:.1f} seconds") + return result + + return timed_func From 446e5d4f1f73d68ddfea574efd1efdc3c5628dfe Mon Sep 17 00:00:00 2001 From: Paul Lam Date: Sun, 24 Sep 2023 06:29:50 +0900 Subject: [PATCH 2/5] apply @time_function --- mind_palace/index.py | 3 +++ tests/end_to_end/test_query.py | 6 ++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/mind_palace/index.py b/mind_palace/index.py index a1d5b59..ea581cf 100644 --- a/mind_palace/index.py +++ b/mind_palace/index.py @@ -1,7 +1,10 @@ from llama_index.llms import OpenAI import llama_index as li +from measure import time_function + +@time_function def index_nodes(nodes, model="gpt-3.5-turbo"): service_context = li.ServiceContext.from_defaults( llm=OpenAI(model=model), diff --git a/tests/end_to_end/test_query.py b/tests/end_to_end/test_query.py index b56c81a..d877bf8 100644 --- a/tests/end_to_end/test_query.py +++ b/tests/end_to_end/test_query.py @@ -1,12 +1,10 @@ -from tests.context import extract, index, measure +from tests.context import extract, index import llama_index as li def bootstrap_index(xml_dir): nodes = extract.seed_nodes(xml_dir) - vector_index, elapsed_time = measure.time_function(lambda: index.index_nodes(nodes)) - print(f"Elapsed time {elapsed_time:.1f} seconds: Indexed {len(nodes)} nodes") - return vector_index + return index.index_nodes(nodes) def persist_index(vector_index): From d497c3f7fed89031facd607d1a89182802ad521d Mon Sep 17 00:00:00 2001 From: Paul Lam Date: Sun, 24 Sep 2023 06:39:18 +0900 Subject: [PATCH 3/5] add a test_bootstrap_index --- tests/end_to_end/test_query.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/end_to_end/test_query.py b/tests/end_to_end/test_query.py index d877bf8..8ad5aa1 100644 --- a/tests/end_to_end/test_query.py +++ b/tests/end_to_end/test_query.py @@ -1,5 +1,6 @@ from tests.context import extract, index import llama_index as li +import pytest def bootstrap_index(xml_dir): @@ -7,6 +8,13 @@ def bootstrap_index(xml_dir): return index.index_nodes(nodes) +@pytest.mark.skip(reason="This test takes a minute to run") +def test_bootstrap_index(): + xml_dir = "./resources/xmls/12-pdfs-from-steve-aug-22/" + vector_index = bootstrap_index(xml_dir) + assert vector_index is not None + + def persist_index(vector_index): vector_index.storage_context.persist() From de1657cbbe7bc7302e85b5232dc937b75a606d24 Mon Sep 17 00:00:00 2001 From: Paul Lam Date: Sun, 24 Sep 2023 06:39:45 +0900 Subject: [PATCH 4/5] edit index_nodes log to print both models --- mind_palace/index.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mind_palace/index.py b/mind_palace/index.py index ea581cf..da783a5 100644 --- a/mind_palace/index.py +++ b/mind_palace/index.py @@ -6,13 +6,16 @@ @time_function def index_nodes(nodes, model="gpt-3.5-turbo"): + embed_model = "local" service_context = li.ServiceContext.from_defaults( llm=OpenAI(model=model), - embed_model="local" + embed_model=embed_model # Q: how much does a better embedding model help? ) - print(f"Creating an index with {len(nodes)} number of nodes using model {model}") + print( + f"Creating an index with {len(nodes)} number of nodes using embedding model {embed_model} and querying LLM {model}" + ) index = li.VectorStoreIndex(nodes, service_context=service_context) return index From cf5fd6c3bab3df31bb15e79c9ead0d8bc891fe90 Mon Sep 17 00:00:00 2001 From: Paul Lam Date: Sun, 24 Sep 2023 06:41:08 +0900 Subject: [PATCH 5/5] docstring to time_function --- mind_palace/measure.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mind_palace/measure.py b/mind_palace/measure.py index 94a8553..b666a8e 100644 --- a/mind_palace/measure.py +++ b/mind_palace/measure.py @@ -2,6 +2,8 @@ def time_function(func): + """Decorator to time and print timing of a single function call""" + def timed_func(*args, **kwargs): start_time = time.time() result = func(*args, **kwargs)