From 653945608e43e2ca98eccb814f7885b3a7df0e7f Mon Sep 17 00:00:00 2001 From: Jerry Liu Date: Fri, 17 May 2024 00:53:41 -0700 Subject: [PATCH] cr --- examples/caltrain/caltrain_text_mode.ipynb | 259 +++++++++++---------- 1 file changed, 130 insertions(+), 129 deletions(-) diff --git a/examples/caltrain/caltrain_text_mode.ipynb b/examples/caltrain/caltrain_text_mode.ipynb index 6526fe1..b860f95 100644 --- a/examples/caltrain/caltrain_text_mode.ipynb +++ b/examples/caltrain/caltrain_text_mode.ipynb @@ -28,14 +28,13 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "e6ae2e38-30c9-4865-aa13-47780bc3848f", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "import nest_asyncio\n", + "\n", "nest_asyncio.apply()" ] }, @@ -43,9 +42,7 @@ "cell_type": "code", "execution_count": null, "id": "335ce1d0-757a-4f09-846e-21c409768871", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "!wget \"https://www.caltrain.com/media/31602/download?inline?inline\" -O caltrain_schedule_weekend.pdf" @@ -65,14 +62,28 @@ "cell_type": "code", "execution_count": null, "id": "54aa9579-84d4-49bc-ab54-5474e69c1188", - "metadata": { - "tags": [] - }, - "outputs": [], + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jerryliu/Programming/llama_parse/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Started parsing the file under job_id 5f73353a-1f4b-480d-9eea-58d1d22b75f6\n" + ] + } + ], "source": [ "from llama_parse import LlamaParse\n", "\n", - "docs = LlamaParse(result_type=\"text\").load_data('./caltrain_schedule_weekend.pdf')" + "docs = LlamaParse(result_type=\"text\").load_data(\"./caltrain_schedule_weekend.pdf\")" ] }, { @@ -85,11 +96,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "4928281a-591a-4653-b451-b2b8112a7101", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -178,71 +187,66 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "b3e985b6-9d38-449f-9cf9-aae166824eed", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "from llama_index.core import SummaryIndex\n", "from llama_index.llms.openai import OpenAI\n", "\n", - "llm = OpenAI(model=\"gpt-4-1106-preview\")\n", + "llm = OpenAI(model=\"gpt-4o\")\n", "index = SummaryIndex.from_documents(docs)\n", "query_engine = index.as_query_engine(llm=llm)" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "66eb0976-2cd6-4b14-9083-124baae9ed5d", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ - "response = query_engine.query(\"What are the stops (and times) for train no 233 northbound?\")" + "response = query_engine.query(\n", + " \"What are the stops (and times) for train no 237 northbound?\"\n", + ")" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "7dc6f275-07f4-429e-9335-f50982fe974c", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The stops and times for train number 233 northbound are as follows:\n", + "The stops and times for train no. 237 northbound are as follows:\n", "\n", - "- Tamien at 11:05a\n", - "- San Jose Diridon at 11:12a\n", - "- Santa Clara at 11:18a\n", - "- Lawrence at 11:24a\n", - "- Sunnyvale at 11:28a\n", - "- Mountain View at 11:34a\n", - "- San Antonio at 11:37a\n", - "- California Ave at 11:42a\n", - "- Palo Alto at 11:46a\n", - "- Menlo Park at 11:50a\n", - "- Redwood City at 11:56a\n", - "- San Carlos at 12:01p\n", - "- Belmont at 12:04p\n", - "- Hillsdale at 12:08p\n", - "- Hayward Park at 12:11p\n", - "- San Mateo at 12:15p\n", - "- Burlingame at 12:19p\n", - "- Broadway at 12:22p\n", - "- Millbrae at 12:26p\n", - "- San Bruno at 12:30p\n", - "- S. San Francisco at 12:34p\n", - "- Bayshore at 12:41p\n", - "- 22nd Street at 12:46p\n", - "- San Francisco at 12:53p\n" + "- San Jose Diridon: 12:12 PM\n", + "- Santa Clara: 12:18 PM\n", + "- Lawrence: 12:24 PM\n", + "- Sunnyvale: 12:28 PM\n", + "- Mountain View: 12:34 PM\n", + "- San Antonio: 12:37 PM\n", + "- California Ave: 12:42 PM\n", + "- Palo Alto: 12:46 PM\n", + "- Menlo Park: 12:50 PM\n", + "- Redwood City: 12:56 PM\n", + "- San Carlos: 1:01 PM\n", + "- Belmont: 1:04 PM\n", + "- Hillsdale: 1:08 PM\n", + "- Hayward Park: 1:11 PM\n", + "- San Mateo: 1:15 PM\n", + "- Burlingame: 1:19 PM\n", + "- Broadway: 1:22 PM\n", + "- Millbrae: 1:26 PM\n", + "- San Bruno: 1:30 PM\n", + "- S. San Francisco: 1:34 PM\n", + "- Bayshore: 1:41 PM\n", + "- 22nd Street: 1:46 PM\n", + "- San Francisco: 1:52 PM\n" ] } ], @@ -252,14 +256,14 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "229c4cb0-cf94-4a9f-bc7c-590388f50c1f", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ - "response = query_engine.query(\"What are all the trains (and times) that end at Tamien going Southbound?\")" + "response = query_engine.query(\n", + " \"What are all the trains (and times) that end at Tamien going Southbound?\"\n", + ")" ] }, { @@ -267,32 +271,29 @@ "id": "6cf9fce0-5067-48f6-a7ef-62aa9e2edc3d", "metadata": {}, "source": [ - "It gets most of the answers correct (to be fair it misses the last train 284 at 1:49am)" + "It gets most of the answers correct (to be fair it misses two trains)." ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "51cf03ff-7728-4815-ab72-3bf54fc4a2c0", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The trains that end at Tamien going Southbound and their respective times are:\n", + "The trains that end at Tamien going Southbound are:\n", "\n", "- Train 224 at 10:15a\n", "- Train 228 at 11:45a\n", - "- Train 232 at 12:45p\n", "- Train 240 at 2:45p\n", "- Train 248 at 4:45p\n", "- Train 256 at 6:45p\n", "- Train 264 at 8:45p\n", "- Train 272 at 10:45p\n", - "- Train 280 at 12:44a\n" + "- Train 284 at 1:49a\n" ] } ], @@ -312,18 +313,16 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "364e5155-cc75-4302-a754-9444ae28e6b1", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "from llama_index.core import SimpleDirectoryReader\n", "from llama_index.core import SummaryIndex\n", "from llama_index.llms.openai import OpenAI\n", "\n", - "llm = OpenAI(model=\"gpt-4-1106-preview\")\n", + "llm = OpenAI(model=\"gpt-4o\")\n", "input_file = \"caltrain_schedule_weekend.pdf\"\n", "reader = SimpleDirectoryReader(input_files=[input_file])\n", "base_docs = reader.load_data()\n", @@ -333,11 +332,9 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "a4011389-2d27-4a1a-bf8d-7309da28ab15", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -408,53 +405,52 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "42203c70-7ca7-4200-bf47-6282eefca3bf", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ - "base_response = base_query_engine.query(\"What are the stops (and times) for train no 233 northbound?\")" + "base_response = base_query_engine.query(\n", + " \"What are the stops (and times) for train no 237 northbound?\"\n", + ")" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "06aa47b6-0f31-4b2d-90f0-bf6c74befd38", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Train No. 233 northbound makes the following stops at the times listed:\n", + "Train No. 237 northbound stops at the following stations and times:\n", "\n", - "- San Jose Diridon: 10:12a\n", - "- Santa Clara: 10:18a\n", - "- Lawrence: 10:24a\n", - "- Sunnyvale: 10:28a\n", - "- Mountain View: 10:34a\n", - "- San Antonio: 10:37a\n", - "- California Ave: 10:42a\n", - "- Palo Alto: 10:46a\n", - "- Menlo Park: 10:50a\n", - "- Redwood City: 10:56a\n", - "- San Carlos: 11:01a\n", - "- Belmont: 11:04a\n", - "- Hillsdale: 11:08a\n", - "- Hayward Park: 11:11a\n", - "- San Mateo: 11:15a\n", - "- Burlingame: 11:19a\n", - "- Broadway: 11:22a\n", - "- Millbrae: 11:26a\n", - "- San Bruno: 11:30a\n", - "- S. San Francisco: 11:34a\n", - "- Bayshore: 11:41a\n", - "- 22nd Street: 11:46a\n", - "- San Francisco: 11:53a\n" + "- Tamien: 1:05p\n", + "- San Jose Diridon: 1:12p\n", + "- Santa Clara: 1:18p\n", + "- Lawrence: 1:24p\n", + "- Sunnyvale: 1:28p\n", + "- Mountain View: 1:34p\n", + "- San Antonio: 1:37p\n", + "- California Ave: 1:42p\n", + "- Palo Alto: 1:46p\n", + "- Menlo Park: 1:50p\n", + "- Redwood City: 1:56p\n", + "- San Carlos: 2:01p\n", + "- Belmont: 2:04p\n", + "- Hillsdale: 2:08p\n", + "- Hayward Park: 2:11p\n", + "- San Mateo: 2:15p\n", + "- Burlingame: 2:19p\n", + "- Broadway: 2:22p\n", + "- Millbrae: 2:26p\n", + "- San Bruno: 2:30p\n", + "- S. San Francisco: 2:34p\n", + "- Bayshore: 2:41p\n", + "- 22nd Street: 2:46p\n", + "- San Francisco: 2:52p\n" ] } ], @@ -464,44 +460,50 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "4f3c1de7-3351-4cd8-991c-34a777952194", "metadata": {}, "outputs": [], "source": [ - "base_response = base_query_engine.query(\"What are all the trains (and times) that end at Tamien going Southbound?\")" + "base_response = base_query_engine.query(\n", + " \"What are all the trains (and times) that end at Tamien going Southbound?\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "513b1007-7508-4fb1-836c-de9353433a67", + "metadata": {}, + "source": [ + "Note that the trains don't line up with the times!" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "108edb92-76af-406b-a139-8b9e7c6528f2", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The trains that end at Tamien going Southbound on weekends are:\n", + "The trains that end at Tamien going Southbound are:\n", "\n", - "- Train No. 254 at 12:44a\n", - "- Train No. 258 at 1:49a\n" + "- Train 224 at 10:15a\n", + "- Train 228 at 11:45a\n", + "- Train 240 at 2:45p\n", + "- Train 252 at 4:45p\n", + "- Train 264 at 6:45p\n", + "- Train 276 at 8:45p\n", + "- Train 284 at 10:45p\n", + "- Train 284 at 12:44a\n" ] } ], "source": [ "print(str(base_response))" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2b54c355-6db9-462a-a73e-709fbab39d24", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -519,8 +521,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" + "pygments_lexer": "ipython3" } }, "nbformat": 4,