From be7073805cefead76b08a7758ac917e278622778 Mon Sep 17 00:00:00 2001
From: Paul Lam <paul@quantisan.com>
Date: Sun, 24 Sep 2023 06:29:37 +0900
Subject: [PATCH 1/5] refactor measure.time_function as a decorator

---
 mind_palace/measure.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/mind_palace/measure.py b/mind_palace/measure.py
index 598437c..94a8553 100644
--- a/mind_palace/measure.py
+++ b/mind_palace/measure.py
@@ -2,8 +2,12 @@
 
 
 def time_function(func):
-    start_time = time.time()
-    result = func()
-    end_time = time.time()
-    elapsed_time = end_time - start_time
-    return result, elapsed_time
+    def timed_func(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        elapsed_sec = end_time - start_time
+        print(f"Elapsed time calling {func.__name__}(): {elapsed_sec:.1f} seconds")
+        return result
+
+    return timed_func

From 446e5d4f1f73d68ddfea574efd1efdc3c5628dfe Mon Sep 17 00:00:00 2001
From: Paul Lam <paul@quantisan.com>
Date: Sun, 24 Sep 2023 06:29:50 +0900
Subject: [PATCH 2/5] apply @time_function

---
 mind_palace/index.py           | 3 +++
 tests/end_to_end/test_query.py | 6 ++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/mind_palace/index.py b/mind_palace/index.py
index a1d5b59..ea581cf 100644
--- a/mind_palace/index.py
+++ b/mind_palace/index.py
@@ -1,7 +1,10 @@
 from llama_index.llms import OpenAI
 import llama_index as li
 
+from measure import time_function
 
+
+@time_function
 def index_nodes(nodes, model="gpt-3.5-turbo"):
     service_context = li.ServiceContext.from_defaults(
         llm=OpenAI(model=model),
diff --git a/tests/end_to_end/test_query.py b/tests/end_to_end/test_query.py
index b56c81a..d877bf8 100644
--- a/tests/end_to_end/test_query.py
+++ b/tests/end_to_end/test_query.py
@@ -1,12 +1,10 @@
-from tests.context import extract, index, measure
+from tests.context import extract, index
 import llama_index as li
 
 
 def bootstrap_index(xml_dir):
     nodes = extract.seed_nodes(xml_dir)
-    vector_index, elapsed_time = measure.time_function(lambda: index.index_nodes(nodes))
-    print(f"Elapsed time {elapsed_time:.1f} seconds: Indexed {len(nodes)} nodes")
-    return vector_index
+    return index.index_nodes(nodes)
 
 
 def persist_index(vector_index):

From d497c3f7fed89031facd607d1a89182802ad521d Mon Sep 17 00:00:00 2001
From: Paul Lam <paul@quantisan.com>
Date: Sun, 24 Sep 2023 06:39:18 +0900
Subject: [PATCH 3/5] add a test_bootstrap_index

---
 tests/end_to_end/test_query.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/end_to_end/test_query.py b/tests/end_to_end/test_query.py
index d877bf8..8ad5aa1 100644
--- a/tests/end_to_end/test_query.py
+++ b/tests/end_to_end/test_query.py
@@ -1,5 +1,6 @@
 from tests.context import extract, index
 import llama_index as li
+import pytest
 
 
 def bootstrap_index(xml_dir):
@@ -7,6 +8,13 @@ def bootstrap_index(xml_dir):
     return index.index_nodes(nodes)
 
 
+@pytest.mark.skip(reason="This test takes a minute to run")
+def test_bootstrap_index():
+    xml_dir = "./resources/xmls/12-pdfs-from-steve-aug-22/"
+    vector_index = bootstrap_index(xml_dir)
+    assert vector_index is not None
+
+
 def persist_index(vector_index):
     vector_index.storage_context.persist()
 

From de1657cbbe7bc7302e85b5232dc937b75a606d24 Mon Sep 17 00:00:00 2001
From: Paul Lam <paul@quantisan.com>
Date: Sun, 24 Sep 2023 06:39:45 +0900
Subject: [PATCH 4/5] edit index_nodes log to print both models

---
 mind_palace/index.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/mind_palace/index.py b/mind_palace/index.py
index ea581cf..da783a5 100644
--- a/mind_palace/index.py
+++ b/mind_palace/index.py
@@ -6,13 +6,16 @@
 
 @time_function
 def index_nodes(nodes, model="gpt-3.5-turbo"):
+    embed_model = "local"
     service_context = li.ServiceContext.from_defaults(
         llm=OpenAI(model=model),
-        embed_model="local"
+        embed_model=embed_model
         # Q: how much does a better embedding model help?
     )
 
-    print(f"Creating an index with {len(nodes)} number of nodes using model {model}")
+    print(
+        f"Creating an index with {len(nodes)} number of nodes using embedding model {embed_model} and querying LLM {model}"
+    )
     index = li.VectorStoreIndex(nodes, service_context=service_context)
 
     return index

From cf5fd6c3bab3df31bb15e79c9ead0d8bc891fe90 Mon Sep 17 00:00:00 2001
From: Paul Lam <paul@quantisan.com>
Date: Sun, 24 Sep 2023 06:41:08 +0900
Subject: [PATCH 5/5] docstring to time_function

---
 mind_palace/measure.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mind_palace/measure.py b/mind_palace/measure.py
index 94a8553..b666a8e 100644
--- a/mind_palace/measure.py
+++ b/mind_palace/measure.py
@@ -2,6 +2,8 @@
 
 
 def time_function(func):
+    """Decorator to time and print timing of a single function call"""
+
     def timed_func(*args, **kwargs):
         start_time = time.time()
         result = func(*args, **kwargs)