diff --git a/README.md b/README.md index bb5609ca..4a74c7f6 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ If you're looking for in-depth tutorial-like examples, checkout the [tutorials]( | [RAG Fusion](/examples/RAG_Fusion/) | Open In Colab [![LLM](https://img.shields.io/badge/openai-api-white)](#)| | [Hybrid search BM25 & lancedb ](./examples/Hybrid_search_bm25_lancedb/) | Open In Colab [![LLM](https://img.shields.io/badge/openai-api-white)](#)|[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/hybrid-search-combining-bm25-and-semantic-search-for-better-results-with-lan-1358038fe7e6)| | [Evaluating Prompts with Prompttools](/examples/prompttools-eval-prompts/) | Open In Colab [![LLM](https://img.shields.io/badge/openai-api-white)](#) [![local LLM](https://img.shields.io/badge/local-llm-green)](#)| | -| [NER powered with Semantic Search](/tutorials/NER-powered-Semantic-Search/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/NER-powered-Semantic-Search/NER_powered_Semantic_Search_with_LanceDB.ipynb)[![local LLM](https://img.shields.io/badge/local-llm-green)](#) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/ner-powered-semantic-search-using-lancedb-51051dc3e493)| +| [NER powered with Semantic Search](/tutorials/NER-powered-Semantic-Search/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/NER-powered-Semantic-Search/NER_powered_Semantic_Search_with_LanceDB.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/ner-powered-semantic-search-using-lancedb-51051dc3e493)| [Sentiment Analysis : Analysing Hotel Reviews](/examples/Sentiment-Analysis-Analyse-Hotel-Reviews/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/Sentiment-Analysis-Analyse-Hotel-Reviews/Sentiment_Analysis_using_LanceDB.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/sentiment-analysis-using-lancedb-2da3cb1e3fa6)| | [Facial Recognition](./examples/facial_recognition) | Open In Colab | | [Accelerate Vector Search Applications Using OpenVINO](/tutorials/Accelerate-Vector-Search-Applications-Using-OpenVINO/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/Accelerate-Vector-Search-Applications-Using-OpenVINO/clip_text_image_search.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#)| [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/accelerate-vector-search-applications-using-openvino-51366eabf866)| @@ -87,15 +87,15 @@ Looking to get started with LLMs, vectorDBs, and the world of Generative AI? The | Tutorial | Interactive Environment | Blog Link | | --------- | -------------------------- | ----------- | | | | | -| [Corrective RAG with Langgraph](./tutorials/Corrective-RAG-with_Langgraph/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/Corrective-RAG-with_Langgraph/CRAG_with_Langgraph.ipynb) | | +| [Corrective RAG with Langgraph](./tutorials/Corrective-RAG-with_Langgraph/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/Corrective-RAG-with_Langgraph/CRAG_with_Langgraph.ipynb) [![LLM](https://img.shields.io/badge/openai-api-white)](#) | | | [Product Quantization: Compress High Dimensional Vectors](https://blog.lancedb.com/product-quantization-compress-high-dimensional-vectors-dfcba98fab47) | | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/product-quantization-compress-high-dimensional-vectors-dfcba98fab47) | | [LLMs, RAG, & the missing storage layer for AI](https://medium.com/etoai/llms-rag-the-missing-storage-layer-for-ai-28ded35fa984) | | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://medium.com/etoai/llms-rag-the-missing-storage-layer-for-ai-28ded35fa984) | -| [Fine-Tuning LLM using PEFT & QLoRA](./tutorials/fine-tuning_LLM_with_PEFT_QLoRA) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/fine-tuning_LLM_with_PEFT_QLoRA/main.ipynb) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/optimizing-llms-a-step-by-step-guide-to-fine-tuning-with-peft-and-qlora-22eddd13d25b) | -| [Context-Aware Chatbot using Llama 2 & LanceDB](./tutorials/chatbot_using_Llama2_&_lanceDB) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/chatbot_using_Llama2_&_lanceDB/main.ipynb) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/context-aware-chatbot-using-llama-2-lancedb-as-vector-database-4d771d95c755) | +| [Fine-Tuning LLM using PEFT & QLoRA](./tutorials/fine-tuning_LLM_with_PEFT_QLoRA) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/fine-tuning_LLM_with_PEFT_QLoRA/main.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#)| [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/optimizing-llms-a-step-by-step-guide-to-fine-tuning-with-peft-and-qlora-22eddd13d25b) | +| [Context-Aware Chatbot using Llama 2 & LanceDB](./tutorials/chatbot_using_Llama2_&_lanceDB) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/chatbot_using_Llama2_&_lanceDB/main.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#)| [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/context-aware-chatbot-using-llama-2-lancedb-as-vector-database-4d771d95c755) | | [A Primer on Text Chunking and its Types](./tutorials/different-types-text-chunking-in-RAG) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/different-types-text-chunking-in-RAG/Text_Chunking_on_RAG_application_with_LanceDB.ipynb) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/a-primer-on-text-chunking-and-its-types-a420efc96a13) | -| [NER powered Semantic Search](./tutorials/NER-powered-Semantic-Search) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/NER-powered-Semantic-Search/NER_powered_Semantic_Search_with_LanceDB.ipynb) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://medium.com/etoai/ner-powered-semantic-search-using-lancedb-51051dc3e493) | -| [Better RAG with FLARE](./tutorials/better-rag-FLAIR) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/better-rag-FLAIR/main.ipynb) |[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://medium.com/@aksdesai1998/better-rag-enhancing-ai-with-active-retrieval-augmented-generation-flare-3b66646e2a9f) | -| [Accelerate Vector Search Applications Using OpenVINO](./tutorials/Sentiment-Analysis-using-LanceDB) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/Accelerate-Vector-Search-Applications-Using-OpenVINO/clip_text_image_search.ipynb) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/accelerate-vector-search-applications-using-openvino-51366eabf866)| +| [NER powered Semantic Search](./tutorials/NER-powered-Semantic-Search) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/NER-powered-Semantic-Search/NER_powered_Semantic_Search_with_LanceDB.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#)| [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://medium.com/etoai/ner-powered-semantic-search-using-lancedb-51051dc3e493) | +| [Better RAG with FLARE](./tutorials/better-rag-FLAIR) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/better-rag-FLAIR/main.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#) [![LLM](https://img.shields.io/badge/openai-api-white)](#)|[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://medium.com/@aksdesai1998/better-rag-enhancing-ai-with-active-retrieval-augmented-generation-flare-3b66646e2a9f) | +| [Accelerate Vector Search Applications Using OpenVINO](./tutorials/Sentiment-Analysis-using-LanceDB) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/Accelerate-Vector-Search-Applications-Using-OpenVINO/clip_text_image_search.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#)| [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/accelerate-vector-search-applications-using-openvino-51366eabf866)| diff --git a/examples/RAG_re_ranking/lancedb_cloud/README.md b/examples/RAG_re_ranking/lancedb_cloud/README.md index 7ea69462..96a2d95b 100644 --- a/examples/RAG_re_ranking/lancedb_cloud/README.md +++ b/examples/RAG_re_ranking/lancedb_cloud/README.md @@ -12,6 +12,13 @@ if you would like to set api key through an environment variable: ``` export LANCEDB_API_KEY="sk_..." ``` +or +``` +import os +import getpass + +os.environ["LANCEDB_API_KEY"] = getpass.getpass("Enter Your LANCEDB API Key:") +``` replace the following lines in main.py with your project slug and api key" ``` diff --git a/examples/RAG_re_ranking/lancedb_cloud/main.ipynb b/examples/RAG_re_ranking/lancedb_cloud/main.ipynb index 21b6364b..8d1999e7 100644 --- a/examples/RAG_re_ranking/lancedb_cloud/main.ipynb +++ b/examples/RAG_re_ranking/lancedb_cloud/main.ipynb @@ -39,71 +39,46 @@ }, { "cell_type": "markdown", + "metadata": { + "id": "wgPbKbpumkhH" + }, "source": [ "## Credentials\n", "\n", "Copy and paste the project name and the api key from your project page.\n", "These will be used later to [connect to LanceDB Cloud](#scroll-to=5q8m6GMD7sGu)" - ], - "metadata": { - "id": "wgPbKbpumkhH" - } + ] }, { "cell_type": "code", - "source": [ - "project_slug = \"your-project-slug\" # @param {type:\"string\"}" - ], + "execution_count": 1, "metadata": { "id": "rqEXT5-fmofw" }, - "execution_count": 1, - "outputs": [] + "outputs": [], + "source": [ + "project_slug = \"your-project-slug\" # @param {type:\"string\"}" + ] }, { "cell_type": "code", - "source": [ - "api_key = \"sk_...\" # @param {type:\"string\"}" - ], + "execution_count": 2, "metadata": { "id": "5LYmBomPmswi" }, - "execution_count": 2, - "outputs": [] + "outputs": [], + "source": [ + "api_key = \"sk_...\" # @param {type:\"string\"}" + ] }, { "cell_type": "markdown", - "source": [ - "You can also set the LANCEDB_API_KEY as an environment variable with one of the options below" - ], "metadata": { "id": "Xs6tr6CMnBrr" - } - }, - { - "cell_type": "code", - "source": [ - "!export LANCEDB_API_KEY=\"sk_...\"" - ], - "metadata": { - "id": "RtjJHRNHnEHE" }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", "source": [ - "import os\n", - "import getpass\n", - "\n", - "os.environ[\"LANCEDB_API_KEY\"] = getpass.getpass(\"Enter Your LANCEDB API Key:\")" - ], - "metadata": { - "id": "Qsk18VNxnGIg" - }, - "execution_count": null, - "outputs": [] + "You can also set the LANCEDB_API_KEY as an environment variable. More details can be found **here**." + ] }, { "cell_type": "markdown", @@ -126,8 +101,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.4/87.4 kB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.4/8.4 MB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", @@ -335,8 +310,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", @@ -347,172 +322,172 @@ ] }, { - "output_type": "display_data", "data": { - "text/plain": [ - "tokenizer_config.json: 0%| | 0.00/396 [00:00\n", @@ -951,10 +909,27 @@ "\n", " \n", " \n" + ], + "text/plain": [ + " _id \\\n", + "13181 6df3dc585e32f3b1cb49228d94a5469c30d79d2b \n", + "18678 784376563c94e231241fbcf71d4d2774aec4b935 \n", + "4444 19751e0f81a103658bbac2506f5d5c8e06a1c06a \n", + "\n", + " title \\\n", + "13181 High Performance Computer Acoustic Data Accele... \n", + "18678 A Comparison over Focused Web Crawling Strategies \n", + "4444 STDP-based spiking deep convolutional neural n... \n", + "\n", + " text \n", + "13181 This paper presents a new software model desig... \n", + "18678 In this paper we review and compare focused cr... \n", + "4444 Previous studies have shown that spike-timing-... " ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], "source": [ @@ -1003,18 +978,18 @@ }, "outputs": [ { - "output_type": "display_data", "data": { - "text/plain": [ - " 0%| | 0/79 [00:00 Classification of human activity by using a Stacked Autoencoder\n" ] }, { - "output_type": "execute_result", "data": { - "text/plain": [ - " _id \\\n", - "0 83d323a5bb26b706d4f6d24eb27411a7e7ff57e6 \n", - "1 a3345798b1faf238e8d805bbe9124b0b8e0c869f \n", - "2 e0534bfb477c5a82e98d0cb386ae3eb31d349c91 \n", - "3 c65945c08b7fd77ffd2c53369e8928699c3993e7 \n", - "4 1af2e075903a3cc5ad5a192921a0b4fb67645dc1 \n", - "5 3979cf5a013063e98ad0caf2e7110c2686cf1640 \n", - "6 0fb926cae217b70c97c74eb70b2a6b8c47574812 \n", - "7 488257dcbc7bcb56836f10a410e69c2c283989e5 \n", - "8 65f415c6d88aca139867702fc64aa179781b8e65 \n", - "9 244fc78ce607812edb90290727dab4d33377e986 \n", - "\n", - " title \\\n", - "0 Protective action of green tea catechins in ne... \n", - "1 Autophagy as a regulated pathway of cellular d... \n", - "2 Cellular and molecular mechanisms of hepatocel... \n", - "3 Comparing Alzheimer’s and Parkinson’s diseases... \n", - "4 Mathematical models of cancer metabolism. \n", - "5 Basic local alignment search tool. \n", - "6 Network biology: understanding the cell's func... \n", - "7 mTOR Signaling in Growth Control and Disease \n", - "8 PID: the Pathway Interaction Database \n", - "9 Transfer of mitochondria via tunneling nanotub... \n", - "\n", - " text \\\n", - "0 Mitochondria are central players in the regula... \n", - "1 Macroautophagy is a dynamic process involving ... \n", - "2 Hepatocellular carcinoma (HCC) is the most com... \n", - "3 Recent advances in large datasets analysis off... \n", - "4 Metabolism is essential for life, and its alte... \n", - "5 A new approach to rapid sequence comparison, b... \n", - "6 A key aim of postgenomic biomedical research i... \n", - "7 The mechanistic target of rapamycin (mTOR) sig... \n", - "8 The Pathway Interaction Database (PID, http://... \n", - "9 Tunneling nanotubes (TNTs) are F-actin-based m... \n", - "\n", - " vector _distance \n", - "0 [-0.014866754, 0.0028244434, -0.023141732, 0.0... 0.281554 \n", - "1 [-0.042504933, 0.00053501845, -0.016986104, 0.... 0.312909 \n", - "2 [0.03984485, 0.01583628, -0.00934351, -0.02993... 0.366526 \n", - "3 [-0.004613025, -0.0044279257, -0.013920496, 0.... 0.369777 \n", - "4 [-0.0037386382, 0.011562068, -0.022479024, 0.0... 0.370503 \n", - "5 [-0.006935188, 0.020925103, -0.051218845, 0.00... 0.372769 \n", - "6 [0.012990677, 0.028128441, -0.006426807, -0.02... 0.376812 \n", - "7 [0.0006567143, 0.0053487234, -0.0010087299, -0... 0.376821 \n", - "8 [-0.007852315, 0.014019204, -0.026789214, -0.0... 0.378377 \n", - "9 [-0.0063375738, 0.006348416, -0.034239322, 0.0... 0.380112 " - ], "text/html": [ "\n", "
\n", @@ -1438,15 +1363,65 @@ "
\n", " \n", " \n" - ] - }, - "metadata": {}, - "execution_count": 8 - } - ], - "source": [ - "def search(query, top_k=10):\n", - " \"\"\"\n", + ], + "text/plain": [ + " _id \\\n", + "0 83d323a5bb26b706d4f6d24eb27411a7e7ff57e6 \n", + "1 a3345798b1faf238e8d805bbe9124b0b8e0c869f \n", + "2 e0534bfb477c5a82e98d0cb386ae3eb31d349c91 \n", + "3 c65945c08b7fd77ffd2c53369e8928699c3993e7 \n", + "4 1af2e075903a3cc5ad5a192921a0b4fb67645dc1 \n", + "5 3979cf5a013063e98ad0caf2e7110c2686cf1640 \n", + "6 0fb926cae217b70c97c74eb70b2a6b8c47574812 \n", + "7 488257dcbc7bcb56836f10a410e69c2c283989e5 \n", + "8 65f415c6d88aca139867702fc64aa179781b8e65 \n", + "9 244fc78ce607812edb90290727dab4d33377e986 \n", + "\n", + " title \\\n", + "0 Protective action of green tea catechins in ne... \n", + "1 Autophagy as a regulated pathway of cellular d... \n", + "2 Cellular and molecular mechanisms of hepatocel... \n", + "3 Comparing Alzheimer’s and Parkinson’s diseases... \n", + "4 Mathematical models of cancer metabolism. \n", + "5 Basic local alignment search tool. \n", + "6 Network biology: understanding the cell's func... \n", + "7 mTOR Signaling in Growth Control and Disease \n", + "8 PID: the Pathway Interaction Database \n", + "9 Transfer of mitochondria via tunneling nanotub... \n", + "\n", + " text \\\n", + "0 Mitochondria are central players in the regula... \n", + "1 Macroautophagy is a dynamic process involving ... \n", + "2 Hepatocellular carcinoma (HCC) is the most com... \n", + "3 Recent advances in large datasets analysis off... \n", + "4 Metabolism is essential for life, and its alte... \n", + "5 A new approach to rapid sequence comparison, b... \n", + "6 A key aim of postgenomic biomedical research i... \n", + "7 The mechanistic target of rapamycin (mTOR) sig... \n", + "8 The Pathway Interaction Database (PID, http://... \n", + "9 Tunneling nanotubes (TNTs) are F-actin-based m... \n", + "\n", + " vector _distance \n", + "0 [-0.014866754, 0.0028244434, -0.023141732, 0.0... 0.281554 \n", + "1 [-0.042504933, 0.00053501845, -0.016986104, 0.... 0.312909 \n", + "2 [0.03984485, 0.01583628, -0.00934351, -0.02993... 0.366526 \n", + "3 [-0.004613025, -0.0044279257, -0.013920496, 0.... 0.369777 \n", + "4 [-0.0037386382, 0.011562068, -0.022479024, 0.0... 0.370503 \n", + "5 [-0.006935188, 0.020925103, -0.051218845, 0.00... 0.372769 \n", + "6 [0.012990677, 0.028128441, -0.006426807, -0.02... 0.376812 \n", + "7 [0.0006567143, 0.0053487234, -0.0010087299, -0... 0.376821 \n", + "8 [-0.007852315, 0.014019204, -0.026789214, -0.0... 0.378377 \n", + "9 [-0.0063375738, 0.006348416, -0.034239322, 0.0... 0.380112 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def search(query, top_k=10):\n", + " \"\"\"\n", " Search a query from the table\n", " \"\"\"\n", " query_vector = embed_model.encode_queries(\n", @@ -1494,76 +1469,14 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "QUERY:-> Classification of human activity by using a Stacked Autoencoder\n" ] }, { - "output_type": "execute_result", "data": { - "text/plain": [ - " _id \\\n", - "0 a3345798b1faf238e8d805bbe9124b0b8e0c869f \n", - "1 3979cf5a013063e98ad0caf2e7110c2686cf1640 \n", - "2 83d323a5bb26b706d4f6d24eb27411a7e7ff57e6 \n", - "3 65f415c6d88aca139867702fc64aa179781b8e65 \n", - "4 0fb926cae217b70c97c74eb70b2a6b8c47574812 \n", - "5 1af2e075903a3cc5ad5a192921a0b4fb67645dc1 \n", - "6 c65945c08b7fd77ffd2c53369e8928699c3993e7 \n", - "7 488257dcbc7bcb56836f10a410e69c2c283989e5 \n", - "8 244fc78ce607812edb90290727dab4d33377e986 \n", - "9 e0534bfb477c5a82e98d0cb386ae3eb31d349c91 \n", - "\n", - " title \\\n", - "0 Autophagy as a regulated pathway of cellular d... \n", - "1 Basic local alignment search tool. \n", - "2 Protective action of green tea catechins in ne... \n", - "3 PID: the Pathway Interaction Database \n", - "4 Network biology: understanding the cell's func... \n", - "5 Mathematical models of cancer metabolism. \n", - "6 Comparing Alzheimer’s and Parkinson’s diseases... \n", - "7 mTOR Signaling in Growth Control and Disease \n", - "8 Transfer of mitochondria via tunneling nanotub... \n", - "9 Cellular and molecular mechanisms of hepatocel... \n", - "\n", - " text \\\n", - "0 Macroautophagy is a dynamic process involving ... \n", - "1 A new approach to rapid sequence comparison, b... \n", - "2 Mitochondria are central players in the regula... \n", - "3 The Pathway Interaction Database (PID, http://... \n", - "4 A key aim of postgenomic biomedical research i... \n", - "5 Metabolism is essential for life, and its alte... \n", - "6 Recent advances in large datasets analysis off... \n", - "7 The mechanistic target of rapamycin (mTOR) sig... \n", - "8 Tunneling nanotubes (TNTs) are F-actin-based m... \n", - "9 Hepatocellular carcinoma (HCC) is the most com... \n", - "\n", - " vector _distance \\\n", - "0 [-0.042504933, 0.00053501845, -0.016986104, 0.... 0.312909 \n", - "1 [-0.006935188, 0.020925103, -0.051218845, 0.00... 0.372769 \n", - "2 [-0.014866754, 0.0028244434, -0.023141732, 0.0... 0.281554 \n", - "3 [-0.007852315, 0.014019204, -0.026789214, -0.0... 0.378377 \n", - "4 [0.012990677, 0.028128441, -0.006426807, -0.02... 0.376812 \n", - "5 [-0.0037386382, 0.011562068, -0.022479024, 0.0... 0.370503 \n", - "6 [-0.004613025, -0.0044279257, -0.013920496, 0.... 0.369777 \n", - "7 [0.0006567143, 0.0053487234, -0.0010087299, -0... 0.376821 \n", - "8 [-0.0063375738, 0.006348416, -0.034239322, 0.0... 0.380112 \n", - "9 [0.03984485, 0.01583628, -0.00934351, -0.02993... 0.366526 \n", - "\n", - " old_similarity_rank new_scores \n", - "0 2 -3.949219 \n", - "1 6 -5.410156 \n", - "2 1 -6.652344 \n", - "3 9 -7.402344 \n", - "4 7 -7.824219 \n", - "5 5 -8.070312 \n", - "6 4 -9.007812 \n", - "7 8 -9.507812 \n", - "8 10 -9.593750 \n", - "9 3 -10.195312 " - ], "text/html": [ "\n", "
\n", @@ -1906,10 +1819,72 @@ "
\n", " \n", " \n" + ], + "text/plain": [ + " _id \\\n", + "0 a3345798b1faf238e8d805bbe9124b0b8e0c869f \n", + "1 3979cf5a013063e98ad0caf2e7110c2686cf1640 \n", + "2 83d323a5bb26b706d4f6d24eb27411a7e7ff57e6 \n", + "3 65f415c6d88aca139867702fc64aa179781b8e65 \n", + "4 0fb926cae217b70c97c74eb70b2a6b8c47574812 \n", + "5 1af2e075903a3cc5ad5a192921a0b4fb67645dc1 \n", + "6 c65945c08b7fd77ffd2c53369e8928699c3993e7 \n", + "7 488257dcbc7bcb56836f10a410e69c2c283989e5 \n", + "8 244fc78ce607812edb90290727dab4d33377e986 \n", + "9 e0534bfb477c5a82e98d0cb386ae3eb31d349c91 \n", + "\n", + " title \\\n", + "0 Autophagy as a regulated pathway of cellular d... \n", + "1 Basic local alignment search tool. \n", + "2 Protective action of green tea catechins in ne... \n", + "3 PID: the Pathway Interaction Database \n", + "4 Network biology: understanding the cell's func... \n", + "5 Mathematical models of cancer metabolism. \n", + "6 Comparing Alzheimer’s and Parkinson’s diseases... \n", + "7 mTOR Signaling in Growth Control and Disease \n", + "8 Transfer of mitochondria via tunneling nanotub... \n", + "9 Cellular and molecular mechanisms of hepatocel... \n", + "\n", + " text \\\n", + "0 Macroautophagy is a dynamic process involving ... \n", + "1 A new approach to rapid sequence comparison, b... \n", + "2 Mitochondria are central players in the regula... \n", + "3 The Pathway Interaction Database (PID, http://... \n", + "4 A key aim of postgenomic biomedical research i... \n", + "5 Metabolism is essential for life, and its alte... \n", + "6 Recent advances in large datasets analysis off... \n", + "7 The mechanistic target of rapamycin (mTOR) sig... \n", + "8 Tunneling nanotubes (TNTs) are F-actin-based m... \n", + "9 Hepatocellular carcinoma (HCC) is the most com... \n", + "\n", + " vector _distance \\\n", + "0 [-0.042504933, 0.00053501845, -0.016986104, 0.... 0.312909 \n", + "1 [-0.006935188, 0.020925103, -0.051218845, 0.00... 0.372769 \n", + "2 [-0.014866754, 0.0028244434, -0.023141732, 0.0... 0.281554 \n", + "3 [-0.007852315, 0.014019204, -0.026789214, -0.0... 0.378377 \n", + "4 [0.012990677, 0.028128441, -0.006426807, -0.02... 0.376812 \n", + "5 [-0.0037386382, 0.011562068, -0.022479024, 0.0... 0.370503 \n", + "6 [-0.004613025, -0.0044279257, -0.013920496, 0.... 0.369777 \n", + "7 [0.0006567143, 0.0053487234, -0.0010087299, -0... 0.376821 \n", + "8 [-0.0063375738, 0.006348416, -0.034239322, 0.0... 0.380112 \n", + "9 [0.03984485, 0.01583628, -0.00934351, -0.02993... 0.366526 \n", + "\n", + " old_similarity_rank new_scores \n", + "0 2 -3.949219 \n", + "1 6 -5.410156 \n", + "2 1 -6.652344 \n", + "3 9 -7.402344 \n", + "4 7 -7.824219 \n", + "5 5 -8.070312 \n", + "6 4 -9.007812 \n", + "7 8 -9.507812 \n", + "8 10 -9.593750 \n", + "9 3 -10.195312 " ] }, + "execution_count": 9, "metadata": {}, - "execution_count": 9 + "output_type": "execute_result" } ], "source": [ @@ -1958,98 +1933,93 @@ }, "widgets": { "application/vnd.jupyter.widget-state+json": { - "25222b6616e1489eb531950c958c5fdf": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", + "00a0f8d7b04c495b91b6decf446c50d5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_6b6cd8dbe43440b29bf705ebc04fede7", - "IPY_MODEL_7b8c414f7aad49fe9f98760489d42ed1", - "IPY_MODEL_886c86257b4645a2a929eb39f20ab8a3" - ], - "layout": "IPY_MODEL_f918256ef4874941a1ec098ea5050f6a" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "6b6cd8dbe43440b29bf705ebc04fede7": { + "01a1a33b691d427bb5cccce1f4b79693": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_187dee520d434a1eaab95c4b17723d35", - "placeholder": "​", - "style": "IPY_MODEL_af680e36244e4f9691bb156d01c3b3b8", - "value": "tokenizer_config.json: 100%" - } - }, - "7b8c414f7aad49fe9f98760489d42ed1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_413e5e69d5f246df9d530bcb797286d9", - "max": 396, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_ada52c81a40444eca27763305e25ef92", - "value": 396 + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "886c86257b4645a2a929eb39f20ab8a3": { + "077036d733a84d2881bd0f4d486277b4": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3f5273f0ab8645368d73148784759cf1", - "placeholder": "​", - "style": "IPY_MODEL_b72560a9b60348e1a2764f24a33188fa", - "value": " 396/396 [00:00<00:00, 21.3kB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "f918256ef4874941a1ec098ea5050f6a": { + "0cb30ddb214540f8b74219a9fc77127b": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2098,77 +2068,83 @@ "width": null } }, - "187dee520d434a1eaab95c4b17723d35": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "0d7c92de0c384d72aafada73e685aa08": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_name": "StyleView", + "description_width": "" } }, - "af680e36244e4f9691bb156d01c3b3b8": { + "0dfe271be6914892b327d306e669f4aa": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0cb30ddb214540f8b74219a9fc77127b", + "placeholder": "​", + "style": "IPY_MODEL_a6b63191503c43f691f28878fcd39b26", + "value": " 125/125 [00:00<00:00, 8.81kB/s]" + } + }, + "109a9da70a6a4f6789fa397bf2a81fa4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c2115e12648343a4b0ca23455c46f9a5", + "placeholder": "​", + "style": "IPY_MODEL_a7c7e8bbdbbf44649af4e40be262a959", + "value": " 232k/232k [00:00<00:00, 1.86MB/s]" + } + }, + "10d4116aace649bbae035c02e13828d8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "413e5e69d5f246df9d530bcb797286d9": { + "11a0fb71fd0e486982215656adcd2bdc": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2217,26 +2193,10 @@ "width": null } }, - "ada52c81a40444eca27763305e25ef92": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "3f5273f0ab8645368d73148784759cf1": { + "11e884ac25d64b0bba935a296b17d5d9": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2285,47 +2245,47 @@ "width": null } }, - "b72560a9b60348e1a2764f24a33188fa": { + "13e60c9491d542099f1a881330ee1c04": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "80d917f992794502aa6828ed7d01af98": { + "158d4dd4e4f7495e9d2d6f360c29bf02": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_83c646c3a2f543949e2f02138e59e982", - "IPY_MODEL_89706a0fb9e34f97b3ff6db95e2e87b5", - "IPY_MODEL_109a9da70a6a4f6789fa397bf2a81fa4" - ], - "layout": "IPY_MODEL_8d58def01f5d412589983675059926ac" + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_11e884ac25d64b0bba935a296b17d5d9", + "placeholder": "​", + "style": "IPY_MODEL_eb6fa6ee9e74440bb4ce2a92ee4548c7", + "value": "tokenizer.json: 100%" } }, - "83c646c3a2f543949e2f02138e59e982": { + "15b01052fc6140e2be8fae7c2d2928fa": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2337,61 +2297,61 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_2fcb6704693b4842b7c4a224e2d916fd", + "layout": "IPY_MODEL_11a0fb71fd0e486982215656adcd2bdc", "placeholder": "​", - "style": "IPY_MODEL_72c8de55a48d4fa8ae79b9289cbee1d4", - "value": "vocab.txt: 100%" + "style": "IPY_MODEL_5be1ec5880cb459fb7a88ae7c1f2394f", + "value": " 25657/25657 [00:00<00:00, 77019.91 examples/s]" } }, - "89706a0fb9e34f97b3ff6db95e2e87b5": { + "166beb1aa15b4927a9c27fda4a8d6de1": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", + "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_fb03d4b1113a42d1914557a26058d82f", - "max": 231508, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_d6b75db679df4a849b362c77df481e30", - "value": 231508 + "layout": "IPY_MODEL_83474dc942a44919a4e48ee36b65f8f6", + "placeholder": "​", + "style": "IPY_MODEL_77002ce5084c44b8b06987bee947f099", + "value": "Downloading data: 100%" } }, - "109a9da70a6a4f6789fa397bf2a81fa4": { + "16dd8588f2464d4281c0dde85cc28c6d": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", + "_view_name": "ProgressView", + "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_c2115e12648343a4b0ca23455c46f9a5", - "placeholder": "​", - "style": "IPY_MODEL_a7c7e8bbdbbf44649af4e40be262a959", - "value": " 232k/232k [00:00<00:00, 1.86MB/s]" + "layout": "IPY_MODEL_9027c073a81a4360adbe90ed3bd9c099", + "max": 711649, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5fe367a2cb2b4f949b4b667d1d47e49c", + "value": 711649 } }, - "8d58def01f5d412589983675059926ac": { + "187dee520d434a1eaab95c4b17723d35": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2440,10 +2400,10 @@ "width": null } }, - "2fcb6704693b4842b7c4a224e2d916fd": { + "1897e98d30ea4d3896c3c2a2b9b2c23e": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2492,10 +2452,25 @@ "width": null } }, - "72c8de55a48d4fa8ae79b9289cbee1d4": { + "18df3037afae470e8ac9d297f93fd9ce": { "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "199779c3a63c4632be9c8fa65b7f33d8": { + "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -2507,10 +2482,10 @@ "description_width": "" } }, - "fb03d4b1113a42d1914557a26058d82f": { + "1d5908be44944e41a0b81875afb14411": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2559,26 +2534,25 @@ "width": null } }, - "d6b75db679df4a849b362c77df481e30": { + "1d616b7a849f4a83a0f1d8dd446b96f7": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "c2115e12648343a4b0ca23455c46f9a5": { + "1e011635dca34cae8ac44614bdfdf88c": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2627,10 +2601,10 @@ "width": null } }, - "a7c7e8bbdbbf44649af4e40be262a959": { + "1fbd0891d5a24a54ae54656b0d8a6247": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -2642,98 +2616,10 @@ "description_width": "" } }, - "db68c63dcd244ca9b8b391559f8abfd1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_158d4dd4e4f7495e9d2d6f360c29bf02", - "IPY_MODEL_16dd8588f2464d4281c0dde85cc28c6d", - "IPY_MODEL_ad2279ff1d4d47068d037ca698005140" - ], - "layout": "IPY_MODEL_ca7870dc84ec48c681f6411595f321ad" - } - }, - "158d4dd4e4f7495e9d2d6f360c29bf02": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_11e884ac25d64b0bba935a296b17d5d9", - "placeholder": "​", - "style": "IPY_MODEL_eb6fa6ee9e74440bb4ce2a92ee4548c7", - "value": "tokenizer.json: 100%" - } - }, - "16dd8588f2464d4281c0dde85cc28c6d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9027c073a81a4360adbe90ed3bd9c099", - "max": 711649, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_5fe367a2cb2b4f949b4b667d1d47e49c", - "value": 711649 - } - }, - "ad2279ff1d4d47068d037ca698005140": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_52e6dbcd90824ad096c1b610123df935", - "placeholder": "​", - "style": "IPY_MODEL_62450a7fb0e540688ee9ad510a290609", - "value": " 712k/712k [00:00<00:00, 3.65MB/s]" - } - }, - "ca7870dc84ec48c681f6411595f321ad": { + "20f4ea917f4143cd9349fe3afa9c040d": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2782,10 +2668,118 @@ "width": null } }, - "11e884ac25d64b0bba935a296b17d5d9": { + "21b3538c53cd4ff5895a817782884101": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "22c4bb69842a43aa83fcba4eb6c1406c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f22ae0abe3cb40a5b97559c7216400e7", + "max": 443, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_abc7b65193fb49f7899b10820992e163", + "value": 443 + } + }, + "25222b6616e1489eb531950c958c5fdf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6b6cd8dbe43440b29bf705ebc04fede7", + "IPY_MODEL_7b8c414f7aad49fe9f98760489d42ed1", + "IPY_MODEL_886c86257b4645a2a929eb39f20ab8a3" + ], + "layout": "IPY_MODEL_f918256ef4874941a1ec098ea5050f6a" + } + }, + "27b6a73af53d4cf1946ae2ece8c499e2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "2887f8a70d8a45d5b633ec2106865a45": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "28a64ca9341341449b9e778d73db6321": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2ae210dcb6bd47d584b980d478b254a2": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2834,25 +2828,113 @@ "width": null } }, - "eb6fa6ee9e74440bb4ce2a92ee4548c7": { + "2be215a977184227abd5983e7c81b3ff": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6fb5d22a5b744b98907c7d36ad675e37", + "IPY_MODEL_4eca6411f44a4a638423a51289957959", + "IPY_MODEL_b99e200f694d4e2a83360752c6b5441c" + ], + "layout": "IPY_MODEL_c61d80f6b40a4716b18ab7555fe604d1" + } + }, + "2bea46a2708d4e61910dc8138f99426b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "9027c073a81a4360adbe90ed3bd9c099": { + "2bfe8c958a9d44f781453b529255e01f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e7a255788ec94998924142f4255ce409", + "IPY_MODEL_e409114ff67443ca92cb46ffb0697b58", + "IPY_MODEL_3e13281360324914921f135ba80e9672" + ], + "layout": "IPY_MODEL_1d5908be44944e41a0b81875afb14411" + } + }, + "2eed217c58f34c83b5b15bf2f955d9a0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3fef4607ccd943008c272f66f9bf08b8", + "placeholder": "​", + "style": "IPY_MODEL_0d7c92de0c384d72aafada73e685aa08", + "value": "config.json: 100%" + } + }, + "2f98cd9e8e5f4466a7a2cf88b087ce55": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_5c07f16b1bb04a55bdcf0ae2476b78dd", + "IPY_MODEL_4e3741de8a1e4021a49e8abfb925c563", + "IPY_MODEL_0dfe271be6914892b327d306e669f4aa" + ], + "layout": "IPY_MODEL_d88597e336ad4297a4aa6bd3d7fdc5dd" + } + }, + "2fcb6704693b4842b7c4a224e2d916fd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2901,10 +2983,10 @@ "width": null } }, - "5fe367a2cb2b4f949b4b667d1d47e49c": { + "32b556bd8ddc4ef196eba4d1fd6b6b62": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -2917,10 +2999,10 @@ "description_width": "" } }, - "52e6dbcd90824ad096c1b610123df935": { + "33ba91ddcbd74a9e870ed3c2ba9f86d0": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2969,10 +3051,10 @@ "width": null } }, - "62450a7fb0e540688ee9ad510a290609": { + "344dc2f380944602b0d5a712dad8473c": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -2984,10 +3066,10 @@ "description_width": "" } }, - "2f98cd9e8e5f4466a7a2cf88b087ce55": { + "373fe974f094461d87f5b40ad6aa4e91": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2999,83 +3081,82 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_5c07f16b1bb04a55bdcf0ae2476b78dd", - "IPY_MODEL_4e3741de8a1e4021a49e8abfb925c563", - "IPY_MODEL_0dfe271be6914892b327d306e669f4aa" + "IPY_MODEL_f1dd6f744bf34a3aa1ecd72115f63155", + "IPY_MODEL_e5486dfd2ecd411eb60f3e3a89b64660", + "IPY_MODEL_5865b1aead9641d1bed54d9d166945f8" ], - "layout": "IPY_MODEL_d88597e336ad4297a4aa6bd3d7fdc5dd" + "layout": "IPY_MODEL_41de8969ac714df5a4f4132f440af675" } }, - "5c07f16b1bb04a55bdcf0ae2476b78dd": { + "3ab4015a66824bb3a2374d5a090e4e35": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1e011635dca34cae8ac44614bdfdf88c", - "placeholder": "​", - "style": "IPY_MODEL_ba01a8212e0741c48d0d49095cfb5c17", - "value": "special_tokens_map.json: 100%" + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_63a499ae50bb4c12b4cf90dca53d0a07", + "IPY_MODEL_9d04d5b70be34666848a0347437cb7ea", + "IPY_MODEL_ab924099c7cc4d31a60afec68a0ff0d1" + ], + "layout": "IPY_MODEL_92bbac1ac3da4331abc4c0afe7fffbd6" } }, - "4e3741de8a1e4021a49e8abfb925c563": { + "3e13281360324914921f135ba80e9672": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", + "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_fd80d3b519124a759df834da4af06967", - "max": 125, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_5f419e6126ad421ea2efa5b73b38aef5", - "value": 125 + "layout": "IPY_MODEL_4c51bcbf31774cda86465a3ec707831d", + "placeholder": "​", + "style": "IPY_MODEL_8bb2cf9274a84b84ac8d20d4d38aaecc", + "value": " 279/279 [00:00<00:00, 13.5kB/s]" } }, - "0dfe271be6914892b327d306e669f4aa": { + "3e3303826e33485ca844fc82a1035b61": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0cb30ddb214540f8b74219a9fc77127b", - "placeholder": "​", - "style": "IPY_MODEL_a6b63191503c43f691f28878fcd39b26", - "value": " 125/125 [00:00<00:00, 8.81kB/s]" + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c32fbe6f9b39480985e3bff59f6fcccd", + "IPY_MODEL_ad77277175bb456a9a6ce15af4aa5868", + "IPY_MODEL_a6b0d04284b748adb9e74530d25589e0" + ], + "layout": "IPY_MODEL_2ae210dcb6bd47d584b980d478b254a2" } }, - "d88597e336ad4297a4aa6bd3d7fdc5dd": { + "3f5273f0ab8645368d73148784759cf1": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3124,10 +3205,10 @@ "width": null } }, - "1e011635dca34cae8ac44614bdfdf88c": { + "3fef4607ccd943008c272f66f9bf08b8": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3176,25 +3257,26 @@ "width": null } }, - "ba01a8212e0741c48d0d49095cfb5c17": { + "4015861324f64d4ab2b1a7a4153266ff": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "fd80d3b519124a759df834da4af06967": { + "407cf90293e44890903a4d89ee08008a": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3243,26 +3325,10 @@ "width": null } }, - "5f419e6126ad421ea2efa5b73b38aef5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "0cb30ddb214540f8b74219a9fc77127b": { + "413e5e69d5f246df9d530bcb797286d9": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3311,117 +3377,14 @@ "width": null } }, - "a6b63191503c43f691f28878fcd39b26": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", + "41de8969ac714df5a4f4132f440af675": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4d174874270f4e0f88eb27a16aa0f11c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2eed217c58f34c83b5b15bf2f955d9a0", - "IPY_MODEL_63a17bb1290a428da71bfe76a08e04db", - "IPY_MODEL_a35d2f0d849d474f82cbd3dc6879b12d" - ], - "layout": "IPY_MODEL_f2f00b1f73954d95b95889fa1a34c5ae" - } - }, - "2eed217c58f34c83b5b15bf2f955d9a0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3fef4607ccd943008c272f66f9bf08b8", - "placeholder": "​", - "style": "IPY_MODEL_0d7c92de0c384d72aafada73e685aa08", - "value": "config.json: 100%" - } - }, - "63a17bb1290a428da71bfe76a08e04db": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c8ebd32170a44ab6bfedee79ea5509ec", - "max": 731, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_13e60c9491d542099f1a881330ee1c04", - "value": 731 - } - }, - "a35d2f0d849d474f82cbd3dc6879b12d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1897e98d30ea4d3896c3c2a2b9b2c23e", - "placeholder": "​", - "style": "IPY_MODEL_9019bbd2898447fc8c692163e223b4b1", - "value": " 731/731 [00:00<00:00, 52.6kB/s]" - } - }, - "f2f00b1f73954d95b95889fa1a34c5ae": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", @@ -3466,10 +3429,34 @@ "width": null } }, - "3fef4607ccd943008c272f66f9bf08b8": { + "434d6bee7bfc4aa5bab6c2a080a193e5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a938e445c6674332b9d6253358d0e1e0", + "max": 93304, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_01a1a33b691d427bb5cccce1f4b79693", + "value": 93304 + } + }, + "453f440ade9946a5b12bfe6cea948368": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3518,25 +3505,31 @@ "width": null } }, - "0d7c92de0c384d72aafada73e685aa08": { + "471ea20537984f18a07b3a198750c3e0": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fc2403f083124228befe690caad6dd3d", + "placeholder": "​", + "style": "IPY_MODEL_a60c75bb501b49e48d09dd50cb645bdd", + "value": " 1000/1000 [00:00<00:00, 25314.62 examples/s]" } }, - "c8ebd32170a44ab6bfedee79ea5509ec": { + "498ff77afafa440cb0f6afb39627ec1d": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3585,26 +3578,31 @@ "width": null } }, - "13e60c9491d542099f1a881330ee1c04": { + "4a0c748f613a4a979f7eaee825878044": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bc9715c03c4d41f7b13cdc3fbca26b1b", + "placeholder": "​", + "style": "IPY_MODEL_55efb72d3ac94e85b0259ade8318a84a", + "value": "model.safetensors: 100%" } }, - "1897e98d30ea4d3896c3c2a2b9b2c23e": { + "4c51bcbf31774cda86465a3ec707831d": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3653,25 +3651,26 @@ "width": null } }, - "9019bbd2898447fc8c692163e223b4b1": { + "4c955a42756d47eea9a00a87c4b5f0f0": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "8a12ffea544146c29541b3b4a1c6db2b": { + "4d174874270f4e0f88eb27a16aa0f11c": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3683,38 +3682,41 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_4a0c748f613a4a979f7eaee825878044", - "IPY_MODEL_c3be2b813b664e0cb9443c2aa0707afc", - "IPY_MODEL_9c594928a30a4b038c9779c23eaf9fb6" + "IPY_MODEL_2eed217c58f34c83b5b15bf2f955d9a0", + "IPY_MODEL_63a17bb1290a428da71bfe76a08e04db", + "IPY_MODEL_a35d2f0d849d474f82cbd3dc6879b12d" ], - "layout": "IPY_MODEL_aa7efcd26d21435a9b951a872d122c25" + "layout": "IPY_MODEL_f2f00b1f73954d95b95889fa1a34c5ae" } }, - "4a0c748f613a4a979f7eaee825878044": { + "4e3741de8a1e4021a49e8abfb925c563": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", + "_view_name": "ProgressView", + "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_bc9715c03c4d41f7b13cdc3fbca26b1b", - "placeholder": "​", - "style": "IPY_MODEL_55efb72d3ac94e85b0259ade8318a84a", - "value": "model.safetensors: 100%" + "layout": "IPY_MODEL_fd80d3b519124a759df834da4af06967", + "max": 125, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5f419e6126ad421ea2efa5b73b38aef5", + "value": 125 } }, - "c3be2b813b664e0cb9443c2aa0707afc": { + "4eca6411f44a4a638423a51289957959": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3727,54 +3729,77 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_8bd10565e96a46c2882d635a524593f2", - "max": 437955512, + "layout": "IPY_MODEL_850b2174e35d49579060721becfe4287", + "max": 5069051, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_10d4116aace649bbae035c02e13828d8", - "value": 437955512 + "style": "IPY_MODEL_2bea46a2708d4e61910dc8138f99426b", + "value": 5069051 } }, - "9c594928a30a4b038c9779c23eaf9fb6": { + "4f46a6443368428193090a6a4ddf3473": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6ebe20cd50074cafad52f37818123cff", - "placeholder": "​", - "style": "IPY_MODEL_e37f90735fd94698a4172e0292da7c9f", - "value": " 438M/438M [00:03<00:00, 121MB/s]" + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b2c7ed0a7fcd45229194b5422b7801d3", + "IPY_MODEL_434d6bee7bfc4aa5bab6c2a080a193e5", + "IPY_MODEL_7e9a93af505a4b1a96c947363bbae0b9" + ], + "layout": "IPY_MODEL_453f440ade9946a5b12bfe6cea948368" } }, - "aa7efcd26d21435a9b951a872d122c25": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "501f90bcf1ff4efe81cd377df249415e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_aa29504327084ed5816d90ca3f9e9f16", + "IPY_MODEL_71c123550e2a4166955eef2f142170fb", + "IPY_MODEL_15b01052fc6140e2be8fae7c2d2928fa" + ], + "layout": "IPY_MODEL_8badd98fa463404aa80f604b45f4a912" + } + }, + "52e6dbcd90824ad096c1b610123df935": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, @@ -3808,10 +3833,77 @@ "width": null } }, - "bc9715c03c4d41f7b13cdc3fbca26b1b": { + "55efb72d3ac94e85b0259ade8318a84a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "584e80ac477d4a01b23405a5fa29f092": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5865b1aead9641d1bed54d9d166945f8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_20f4ea917f4143cd9349fe3afa9c040d", + "placeholder": "​", + "style": "IPY_MODEL_344dc2f380944602b0d5a712dad8473c", + "value": " 17.1M/17.1M [00:00<00:00, 87.8MB/s]" + } + }, + "5913015323b349ac83f29dc3419ee468": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5a8b708edb414013a6915a9cbbe95f0a": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3860,25 +3952,10 @@ "width": null } }, - "55efb72d3ac94e85b0259ade8318a84a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8bd10565e96a46c2882d635a524593f2": { + "5aae76de0c3b42fb81642990d8bbdf93": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3927,10 +4004,10 @@ "width": null } }, - "10d4116aace649bbae035c02e13828d8": { + "5baf674c98a846e1a79fda9c8ee77e78": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3943,10 +4020,46 @@ "description_width": "" } }, - "6ebe20cd50074cafad52f37818123cff": { + "5be1ec5880cb459fb7a88ae7c1f2394f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5c07f16b1bb04a55bdcf0ae2476b78dd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1e011635dca34cae8ac44614bdfdf88c", + "placeholder": "​", + "style": "IPY_MODEL_ba01a8212e0741c48d0d49095cfb5c17", + "value": "special_tokens_map.json: 100%" + } + }, + "5cd148642750417abd38cbf483ccf1f9": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3995,68 +4108,57 @@ "width": null } }, - "e37f90735fd94698a4172e0292da7c9f": { + "5f419e6126ad421ea2efa5b73b38aef5": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "9003eadfcead41aaaadabf18a706200f": { + "5fe367a2cb2b4f949b4b667d1d47e49c": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9e8a582cb44a4b618edc8d7844956a93", - "IPY_MODEL_22c4bb69842a43aa83fcba4eb6c1406c", - "IPY_MODEL_e191dbd0809c47c7ba28d3f6a0fcb1c5" - ], - "layout": "IPY_MODEL_b7d5f50552744a528b99873318ee1bfc" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "9e8a582cb44a4b618edc8d7844956a93": { + "62450a7fb0e540688ee9ad510a290609": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8b4cefc11b4a403eaafeafefdb0cd763", - "placeholder": "​", - "style": "IPY_MODEL_077036d733a84d2881bd0f4d486277b4", - "value": "tokenizer_config.json: 100%" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "22c4bb69842a43aa83fcba4eb6c1406c": { + "63a17bb1290a428da71bfe76a08e04db": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4069,18 +4171,18 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_f22ae0abe3cb40a5b97559c7216400e7", - "max": 443, + "layout": "IPY_MODEL_c8ebd32170a44ab6bfedee79ea5509ec", + "max": 731, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_abc7b65193fb49f7899b10820992e163", - "value": 443 + "style": "IPY_MODEL_13e60c9491d542099f1a881330ee1c04", + "value": 731 } }, - "e191dbd0809c47c7ba28d3f6a0fcb1c5": { + "63a499ae50bb4c12b4cf90dca53d0a07": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4092,29 +4194,101 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_7410dd99fca940159ba8d13c9c52bae3", + "layout": "IPY_MODEL_bc01b443cb204014bcbcf7cb0fea4c86", "placeholder": "​", - "style": "IPY_MODEL_6710ae95aea9445ab998afb5d0bb3241", - "value": " 443/443 [00:00<00:00, 35.6kB/s]" + "style": "IPY_MODEL_28a64ca9341341449b9e778d73db6321", + "value": "config.json: 100%" } }, - "b7d5f50552744a528b99873318ee1bfc": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "6710ae95aea9445ab998afb5d0bb3241": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, + "_view_name": "StyleView", + "description_width": "" + } + }, + "691424012c59434f8cc17f3d6aa001f3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8cad72468680488aa62c33186cedf084", + "placeholder": "​", + "style": "IPY_MODEL_21b3538c53cd4ff5895a817782884101", + "value": "100%" + } + }, + "6b6cd8dbe43440b29bf705ebc04fede7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_187dee520d434a1eaab95c4b17723d35", + "placeholder": "​", + "style": "IPY_MODEL_af680e36244e4f9691bb156d01c3b3b8", + "value": "tokenizer_config.json: 100%" + } + }, + "6dcfc1e9c851445e95b862f30dfc8dee": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6ebe20cd50074cafad52f37818123cff": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, "display": null, "flex": null, "flex_flow": null, @@ -4150,10 +4324,70 @@ "width": null } }, - "8b4cefc11b4a403eaafeafefdb0cd763": { + "6fb5d22a5b744b98907c7d36ad675e37": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b7dfac1fc6f047d8a611ad711e18bbe7", + "placeholder": "​", + "style": "IPY_MODEL_5913015323b349ac83f29dc3419ee468", + "value": "sentencepiece.bpe.model: 100%" + } + }, + "71c123550e2a4166955eef2f142170fb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9c0796729bb0455d92a4f418e86fa38a", + "max": 25657, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5baf674c98a846e1a79fda9c8ee77e78", + "value": 25657 + } + }, + "72c8de55a48d4fa8ae79b9289cbee1d4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7410dd99fca940159ba8d13c9c52bae3": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4202,10 +4436,10 @@ "width": null } }, - "077036d733a84d2881bd0f4d486277b4": { + "77002ce5084c44b8b06987bee947f099": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -4217,10 +4451,34 @@ "description_width": "" } }, - "f22ae0abe3cb40a5b97559c7216400e7": { + "7b8c414f7aad49fe9f98760489d42ed1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_413e5e69d5f246df9d530bcb797286d9", + "max": 396, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ada52c81a40444eca27763305e25ef92", + "value": 396 + } + }, + "7dd04c9ff2b34590ade55890b2f47b88": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4269,26 +4527,89 @@ "width": null } }, - "abc7b65193fb49f7899b10820992e163": { + "7e9a93af505a4b1a96c947363bbae0b9": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_33ba91ddcbd74a9e870ed3c2ba9f86d0", + "placeholder": "​", + "style": "IPY_MODEL_803e1962e31b4a918c28e9ff20732313", + "value": " 93.3k/93.3k [00:00<00:00, 528kB/s]" + } + }, + "7fd4ee74216249ae806b5d4045da9523": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bbbc8e741a0b44ef835e10fee58bbadf", + "placeholder": "​", + "style": "IPY_MODEL_1fbd0891d5a24a54ae54656b0d8a6247", + "value": " 19.0M/19.0M [00:02<00:00, 7.08MB/s]" + } + }, + "803e1962e31b4a918c28e9ff20732313": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "7410dd99fca940159ba8d13c9c52bae3": { + "80d917f992794502aa6828ed7d01af98": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_83c646c3a2f543949e2f02138e59e982", + "IPY_MODEL_89706a0fb9e34f97b3ff6db95e2e87b5", + "IPY_MODEL_109a9da70a6a4f6789fa397bf2a81fa4" + ], + "layout": "IPY_MODEL_8d58def01f5d412589983675059926ac" + } + }, + "83474dc942a44919a4e48ee36b65f8f6": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4337,47 +4658,83 @@ "width": null } }, - "6710ae95aea9445ab998afb5d0bb3241": { + "83c646c3a2f543949e2f02138e59e982": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2fcb6704693b4842b7c4a224e2d916fd", + "placeholder": "​", + "style": "IPY_MODEL_72c8de55a48d4fa8ae79b9289cbee1d4", + "value": "vocab.txt: 100%" } }, - "2be215a977184227abd5983e7c81b3ff": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", + "850b2174e35d49579060721becfe4287": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_6fb5d22a5b744b98907c7d36ad675e37", - "IPY_MODEL_4eca6411f44a4a638423a51289957959", - "IPY_MODEL_b99e200f694d4e2a83360752c6b5441c" - ], - "layout": "IPY_MODEL_c61d80f6b40a4716b18ab7555fe604d1" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "6fb5d22a5b744b98907c7d36ad675e37": { + "886c86257b4645a2a929eb39f20ab8a3": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4389,16 +4746,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_b7dfac1fc6f047d8a611ad711e18bbe7", + "layout": "IPY_MODEL_3f5273f0ab8645368d73148784759cf1", "placeholder": "​", - "style": "IPY_MODEL_5913015323b349ac83f29dc3419ee468", - "value": "sentencepiece.bpe.model: 100%" + "style": "IPY_MODEL_b72560a9b60348e1a2764f24a33188fa", + "value": " 396/396 [00:00<00:00, 21.3kB/s]" } }, - "4eca6411f44a4a638423a51289957959": { + "89706a0fb9e34f97b3ff6db95e2e87b5": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4411,39 +4768,62 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_850b2174e35d49579060721becfe4287", - "max": 5069051, + "layout": "IPY_MODEL_fb03d4b1113a42d1914557a26058d82f", + "max": 231508, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_2bea46a2708d4e61910dc8138f99426b", - "value": 5069051 + "style": "IPY_MODEL_d6b75db679df4a849b362c77df481e30", + "value": 231508 } }, - "b99e200f694d4e2a83360752c6b5441c": { + "8a12ffea544146c29541b3b4a1c6db2b": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5a8b708edb414013a6915a9cbbe95f0a", - "placeholder": "​", - "style": "IPY_MODEL_f451f6f10dfc45049f07c44e12b04836", - "value": " 5.07M/5.07M [00:01<00:00, 3.74MB/s]" + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4a0c748f613a4a979f7eaee825878044", + "IPY_MODEL_c3be2b813b664e0cb9443c2aa0707afc", + "IPY_MODEL_9c594928a30a4b038c9779c23eaf9fb6" + ], + "layout": "IPY_MODEL_aa7efcd26d21435a9b951a872d122c25" } }, - "c61d80f6b40a4716b18ab7555fe604d1": { + "8a78b64d3f6b4ffdae7e79266d798635": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_bb9b8200e6be4e18a8cf38b14b03e4ce", + "IPY_MODEL_a83bd13e5c6e4cbb96c2c2f7acfe8423", + "IPY_MODEL_471ea20537984f18a07b3a198750c3e0" + ], + "layout": "IPY_MODEL_f79797ffd6a649b1a0edae63eee91bea" + } + }, + "8b4cefc11b4a403eaafeafefdb0cd763": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4492,10 +4872,10 @@ "width": null } }, - "b7dfac1fc6f047d8a611ad711e18bbe7": { + "8badd98fa463404aa80f604b45f4a912": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4544,10 +4924,10 @@ "width": null } }, - "5913015323b349ac83f29dc3419ee468": { + "8bb2cf9274a84b84ac8d20d4d38aaecc": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -4559,10 +4939,10 @@ "description_width": "" } }, - "850b2174e35d49579060721becfe4287": { + "8bd10565e96a46c2882d635a524593f2": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4611,26 +4991,62 @@ "width": null } }, - "2bea46a2708d4e61910dc8138f99426b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", + "8cad72468680488aa62c33186cedf084": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "5a8b708edb414013a6915a9cbbe95f0a": { + "8d58def01f5d412589983675059926ac": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4679,25 +5095,10 @@ "width": null } }, - "f451f6f10dfc45049f07c44e12b04836": { + "9003eadfcead41aaaadabf18a706200f": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "373fe974f094461d87f5b40ad6aa4e91": { - "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -4709,83 +5110,32 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_f1dd6f744bf34a3aa1ecd72115f63155", - "IPY_MODEL_e5486dfd2ecd411eb60f3e3a89b64660", - "IPY_MODEL_5865b1aead9641d1bed54d9d166945f8" + "IPY_MODEL_9e8a582cb44a4b618edc8d7844956a93", + "IPY_MODEL_22c4bb69842a43aa83fcba4eb6c1406c", + "IPY_MODEL_e191dbd0809c47c7ba28d3f6a0fcb1c5" ], - "layout": "IPY_MODEL_41de8969ac714df5a4f4132f440af675" + "layout": "IPY_MODEL_b7d5f50552744a528b99873318ee1bfc" } }, - "f1dd6f744bf34a3aa1ecd72115f63155": { + "9019bbd2898447fc8c692163e223b4b1": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5aae76de0c3b42fb81642990d8bbdf93", - "placeholder": "​", - "style": "IPY_MODEL_cef20c5f499646e291329b580cf3800f", - "value": "tokenizer.json: 100%" - } - }, - "e5486dfd2ecd411eb60f3e3a89b64660": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_90eac81b19a04354ad842f3fbe87e694", - "max": 17098107, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_cc666236572240f8b1015f187a2f66d9", - "value": 17098107 - } - }, - "5865b1aead9641d1bed54d9d166945f8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_20f4ea917f4143cd9349fe3afa9c040d", - "placeholder": "​", - "style": "IPY_MODEL_344dc2f380944602b0d5a712dad8473c", - "value": " 17.1M/17.1M [00:00<00:00, 87.8MB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "41de8969ac714df5a4f4132f440af675": { + "9027c073a81a4360adbe90ed3bd9c099": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4834,10 +5184,10 @@ "width": null } }, - "5aae76de0c3b42fb81642990d8bbdf93": { + "90eac81b19a04354ad842f3fbe87e694": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4886,25 +5236,10 @@ "width": null } }, - "cef20c5f499646e291329b580cf3800f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "90eac81b19a04354ad842f3fbe87e694": { + "92bbac1ac3da4331abc4c0afe7fffbd6": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -4953,26 +5288,62 @@ "width": null } }, - "cc666236572240f8b1015f187a2f66d9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", + "944f4a261b5240408ab7fc473c7b0835": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "20f4ea917f4143cd9349fe3afa9c040d": { + "9c0796729bb0455d92a4f418e86fa38a": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5021,47 +5392,55 @@ "width": null } }, - "344dc2f380944602b0d5a712dad8473c": { + "9c594928a30a4b038c9779c23eaf9fb6": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6ebe20cd50074cafad52f37818123cff", + "placeholder": "​", + "style": "IPY_MODEL_e37f90735fd94698a4172e0292da7c9f", + "value": " 438M/438M [00:03<00:00, 121MB/s]" } }, - "2bfe8c958a9d44f781453b529255e01f": { + "9d04d5b70be34666848a0347437cb7ea": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_e7a255788ec94998924142f4255ce409", - "IPY_MODEL_e409114ff67443ca92cb46ffb0697b58", - "IPY_MODEL_3e13281360324914921f135ba80e9672" - ], - "layout": "IPY_MODEL_1d5908be44944e41a0b81875afb14411" + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ba10edc885244040ab89498be10bd4db", + "max": 799, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_584e80ac477d4a01b23405a5fa29f092", + "value": 799 } }, - "e7a255788ec94998924142f4255ce409": { + "9e8a582cb44a4b618edc8d7844956a93": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -5073,40 +5452,31 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_d30b92db93594fe8b2f83241bb498f78", + "layout": "IPY_MODEL_8b4cefc11b4a403eaafeafefdb0cd763", "placeholder": "​", - "style": "IPY_MODEL_9f679d0c8c5e4bf59111200f955ae8d7", - "value": "special_tokens_map.json: 100%" + "style": "IPY_MODEL_077036d733a84d2881bd0f4d486277b4", + "value": "tokenizer_config.json: 100%" } }, - "e409114ff67443ca92cb46ffb0697b58": { + "9f679d0c8c5e4bf59111200f955ae8d7": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_944f4a261b5240408ab7fc473c7b0835", - "max": 279, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_32b556bd8ddc4ef196eba4d1fd6b6b62", - "value": 279 + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "3e13281360324914921f135ba80e9672": { + "a35d2f0d849d474f82cbd3dc6879b12d": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -5118,120 +5488,89 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_4c51bcbf31774cda86465a3ec707831d", + "layout": "IPY_MODEL_1897e98d30ea4d3896c3c2a2b9b2c23e", "placeholder": "​", - "style": "IPY_MODEL_8bb2cf9274a84b84ac8d20d4d38aaecc", - "value": " 279/279 [00:00<00:00, 13.5kB/s]" + "style": "IPY_MODEL_9019bbd2898447fc8c692163e223b4b1", + "value": " 731/731 [00:00<00:00, 52.6kB/s]" } }, - "1d5908be44944e41a0b81875afb14411": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "a3eb172e9d324da1bd7d8914e66d2106": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_166beb1aa15b4927a9c27fda4a8d6de1", + "IPY_MODEL_f2667dbbb4c5462986c9cad904767540", + "IPY_MODEL_7fd4ee74216249ae806b5d4045da9523" + ], + "layout": "IPY_MODEL_b5fd41d1dba0476491cb311bd4d47741" } }, - "d30b92db93594fe8b2f83241bb498f78": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "a60c75bb501b49e48d09dd50cb645bdd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_name": "StyleView", + "description_width": "" } }, - "9f679d0c8c5e4bf59111200f955ae8d7": { + "a6b0d04284b748adb9e74530d25589e0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7dd04c9ff2b34590ade55890b2f47b88", + "placeholder": "​", + "style": "IPY_MODEL_c33d33427cc245eebe6e190684304904", + "value": " 1.11G/1.11G [00:08<00:00, 175MB/s]" + } + }, + "a6b63191503c43f691f28878fcd39b26": { "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a7c7e8bbdbbf44649af4e40be262a959": { + "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -5243,10 +5582,34 @@ "description_width": "" } }, - "944f4a261b5240408ab7fc473c7b0835": { + "a83bd13e5c6e4cbb96c2c2f7acfe8423": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e582de00e2af4948b9f072653c787712", + "max": 1000, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2887f8a70d8a45d5b633ec2106865a45", + "value": 1000 + } + }, + "a938e445c6674332b9d6253358d0e1e0": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5295,26 +5658,31 @@ "width": null } }, - "32b556bd8ddc4ef196eba4d1fd6b6b62": { + "aa29504327084ed5816d90ca3f9e9f16": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_407cf90293e44890903a4d89ee08008a", + "placeholder": "​", + "style": "IPY_MODEL_18df3037afae470e8ac9d297f93fd9ce", + "value": "Generating corpus split: 100%" } }, - "4c51bcbf31774cda86465a3ec707831d": { + "aa7efcd26d21435a9b951a872d122c25": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -5363,47 +5731,47 @@ "width": null } }, - "8bb2cf9274a84b84ac8d20d4d38aaecc": { + "ab924099c7cc4d31a60afec68a0ff0d1": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_00a0f8d7b04c495b91b6decf446c50d5", + "placeholder": "​", + "style": "IPY_MODEL_199779c3a63c4632be9c8fa65b7f33d8", + "value": " 799/799 [00:00<00:00, 37.0kB/s]" } }, - "3ab4015a66824bb3a2374d5a090e4e35": { + "abc7b65193fb49f7899b10820992e163": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_63a499ae50bb4c12b4cf90dca53d0a07", - "IPY_MODEL_9d04d5b70be34666848a0347437cb7ea", - "IPY_MODEL_ab924099c7cc4d31a60afec68a0ff0d1" - ], - "layout": "IPY_MODEL_92bbac1ac3da4331abc4c0afe7fffbd6" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "63a499ae50bb4c12b4cf90dca53d0a07": { + "ad2279ff1d4d47068d037ca698005140": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -5415,16 +5783,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_bc01b443cb204014bcbcf7cb0fea4c86", + "layout": "IPY_MODEL_52e6dbcd90824ad096c1b610123df935", "placeholder": "​", - "style": "IPY_MODEL_28a64ca9341341449b9e778d73db6321", - "value": "config.json: 100%" + "style": "IPY_MODEL_62450a7fb0e540688ee9ad510a290609", + "value": " 712k/712k [00:00<00:00, 3.65MB/s]" } }, - "9d04d5b70be34666848a0347437cb7ea": { + "ad77277175bb456a9a6ce15af4aa5868": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -5437,568 +5805,49 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_ba10edc885244040ab89498be10bd4db", - "max": 799, + "layout": "IPY_MODEL_498ff77afafa440cb0f6afb39627ec1d", + "max": 1112206140, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_584e80ac477d4a01b23405a5fa29f092", - "value": 799 + "style": "IPY_MODEL_4015861324f64d4ab2b1a7a4153266ff", + "value": 1112206140 } }, - "ab924099c7cc4d31a60afec68a0ff0d1": { + "ada52c81a40444eca27763305e25ef92": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_00a0f8d7b04c495b91b6decf446c50d5", - "placeholder": "​", - "style": "IPY_MODEL_199779c3a63c4632be9c8fa65b7f33d8", - "value": " 799/799 [00:00<00:00, 37.0kB/s]" - } - }, - "92bbac1ac3da4331abc4c0afe7fffbd6": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "bc01b443cb204014bcbcf7cb0fea4c86": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "28a64ca9341341449b9e778d73db6321": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ba10edc885244040ab89498be10bd4db": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "584e80ac477d4a01b23405a5fa29f092": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "00a0f8d7b04c495b91b6decf446c50d5": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "199779c3a63c4632be9c8fa65b7f33d8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3e3303826e33485ca844fc82a1035b61": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c32fbe6f9b39480985e3bff59f6fcccd", - "IPY_MODEL_ad77277175bb456a9a6ce15af4aa5868", - "IPY_MODEL_a6b0d04284b748adb9e74530d25589e0" - ], - "layout": "IPY_MODEL_2ae210dcb6bd47d584b980d478b254a2" - } - }, - "c32fbe6f9b39480985e3bff59f6fcccd": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e95487a1fdac44fdb47b991d8ba87c3c", - "placeholder": "​", - "style": "IPY_MODEL_dd3eef846edd4b5382b97ed6dce2c6d6", - "value": "model.safetensors: 100%" - } - }, - "ad77277175bb456a9a6ce15af4aa5868": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_498ff77afafa440cb0f6afb39627ec1d", - "max": 1112206140, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_4015861324f64d4ab2b1a7a4153266ff", - "value": 1112206140 - } - }, - "a6b0d04284b748adb9e74530d25589e0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7dd04c9ff2b34590ade55890b2f47b88", - "placeholder": "​", - "style": "IPY_MODEL_c33d33427cc245eebe6e190684304904", - "value": " 1.11G/1.11G [00:08<00:00, 175MB/s]" - } - }, - "2ae210dcb6bd47d584b980d478b254a2": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e95487a1fdac44fdb47b991d8ba87c3c": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "dd3eef846edd4b5382b97ed6dce2c6d6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "498ff77afafa440cb0f6afb39627ec1d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4015861324f64d4ab2b1a7a4153266ff": { + "af680e36244e4f9691bb156d01c3b3b8": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "7dd04c9ff2b34590ade55890b2f47b88": { + "b1140f9f312441a8a32b8c7a9461baac": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -6037,102 +5886,20 @@ "object_fit": null, "object_position": null, "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c33d33427cc245eebe6e190684304904": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4f46a6443368428193090a6a4ddf3473": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b2c7ed0a7fcd45229194b5422b7801d3", - "IPY_MODEL_434d6bee7bfc4aa5bab6c2a080a193e5", - "IPY_MODEL_7e9a93af505a4b1a96c947363bbae0b9" - ], - "layout": "IPY_MODEL_453f440ade9946a5b12bfe6cea948368" - } - }, - "b2c7ed0a7fcd45229194b5422b7801d3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c92cde2b50804c18ace89bc605d1d6d1", - "placeholder": "​", - "style": "IPY_MODEL_1d616b7a849f4a83a0f1d8dd446b96f7", - "value": "Downloading data: 100%" - } - }, - "434d6bee7bfc4aa5bab6c2a080a193e5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a938e445c6674332b9d6253358d0e1e0", - "max": 93304, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_01a1a33b691d427bb5cccce1f4b79693", - "value": 93304 + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "7e9a93af505a4b1a96c947363bbae0b9": { + "b2c7ed0a7fcd45229194b5422b7801d3": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -6144,16 +5911,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_33ba91ddcbd74a9e870ed3c2ba9f86d0", + "layout": "IPY_MODEL_c92cde2b50804c18ace89bc605d1d6d1", "placeholder": "​", - "style": "IPY_MODEL_803e1962e31b4a918c28e9ff20732313", - "value": " 93.3k/93.3k [00:00<00:00, 528kB/s]" + "style": "IPY_MODEL_1d616b7a849f4a83a0f1d8dd446b96f7", + "value": "Downloading data: 100%" } }, - "453f440ade9946a5b12bfe6cea948368": { + "b5fd41d1dba0476491cb311bd4d47741": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -6202,10 +5969,25 @@ "width": null } }, - "c92cde2b50804c18ace89bc605d1d6d1": { + "b72560a9b60348e1a2764f24a33188fa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b7d5f50552744a528b99873318ee1bfc": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -6254,25 +6036,62 @@ "width": null } }, - "1d616b7a849f4a83a0f1d8dd446b96f7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", + "b7dfac1fc6f047d8a611ad711e18bbe7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "a938e445c6674332b9d6253358d0e1e0": { + "b8f872d00274483c951804adeef7c500": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -6321,26 +6140,46 @@ "width": null } }, - "01a1a33b691d427bb5cccce1f4b79693": { + "b99e200f694d4e2a83360752c6b5441c": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5a8b708edb414013a6915a9cbbe95f0a", + "placeholder": "​", + "style": "IPY_MODEL_f451f6f10dfc45049f07c44e12b04836", + "value": " 5.07M/5.07M [00:01<00:00, 3.74MB/s]" + } + }, + "ba01a8212e0741c48d0d49095cfb5c17": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "33ba91ddcbd74a9e870ed3c2ba9f86d0": { + "ba10edc885244040ab89498be10bd4db": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -6389,47 +6228,10 @@ "width": null } }, - "803e1962e31b4a918c28e9ff20732313": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "8a78b64d3f6b4ffdae7e79266d798635": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_bb9b8200e6be4e18a8cf38b14b03e4ce", - "IPY_MODEL_a83bd13e5c6e4cbb96c2c2f7acfe8423", - "IPY_MODEL_471ea20537984f18a07b3a198750c3e0" - ], - "layout": "IPY_MODEL_f79797ffd6a649b1a0edae63eee91bea" - } - }, "bb9b8200e6be4e18a8cf38b14b03e4ce": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -6447,55 +6249,62 @@ "value": "Generating queries split: 100%" } }, - "a83bd13e5c6e4cbb96c2c2f7acfe8423": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e582de00e2af4948b9f072653c787712", - "max": 1000, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2887f8a70d8a45d5b633ec2106865a45", - "value": 1000 - } - }, - "471ea20537984f18a07b3a198750c3e0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", + "bbbc8e741a0b44ef835e10fee58bbadf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fc2403f083124228befe690caad6dd3d", - "placeholder": "​", - "style": "IPY_MODEL_a60c75bb501b49e48d09dd50cb645bdd", - "value": " 1000/1000 [00:00<00:00, 25314.62 examples/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "f79797ffd6a649b1a0edae63eee91bea": { + "bc01b443cb204014bcbcf7cb0fea4c86": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -6544,10 +6353,10 @@ "width": null } }, - "ea704757587d4b09af079e555d6f57d1": { + "bc9715c03c4d41f7b13cdc3fbca26b1b": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -6596,25 +6405,10 @@ "width": null } }, - "6dcfc1e9c851445e95b862f30dfc8dee": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e582de00e2af4948b9f072653c787712": { + "c2115e12648343a4b0ca23455c46f9a5": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -6663,26 +6457,70 @@ "width": null } }, - "2887f8a70d8a45d5b633ec2106865a45": { + "c32fbe6f9b39480985e3bff59f6fcccd": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e95487a1fdac44fdb47b991d8ba87c3c", + "placeholder": "​", + "style": "IPY_MODEL_dd3eef846edd4b5382b97ed6dce2c6d6", + "value": "model.safetensors: 100%" + } + }, + "c33d33427cc245eebe6e190684304904": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "fc2403f083124228befe690caad6dd3d": { + "c3be2b813b664e0cb9443c2aa0707afc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8bd10565e96a46c2882d635a524593f2", + "max": 437955512, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_10d4116aace649bbae035c02e13828d8", + "value": 437955512 + } + }, + "c61d80f6b40a4716b18ab7555fe604d1": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -6731,113 +6569,114 @@ "width": null } }, - "a60c75bb501b49e48d09dd50cb645bdd": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", + "c8ebd32170a44ab6bfedee79ea5509ec": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a3eb172e9d324da1bd7d8914e66d2106": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_166beb1aa15b4927a9c27fda4a8d6de1", - "IPY_MODEL_f2667dbbb4c5462986c9cad904767540", - "IPY_MODEL_7fd4ee74216249ae806b5d4045da9523" - ], - "layout": "IPY_MODEL_b5fd41d1dba0476491cb311bd4d47741" - } - }, - "166beb1aa15b4927a9c27fda4a8d6de1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_83474dc942a44919a4e48ee36b65f8f6", - "placeholder": "​", - "style": "IPY_MODEL_77002ce5084c44b8b06987bee947f099", - "value": "Downloading data: 100%" - } - }, - "f2667dbbb4c5462986c9cad904767540": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_dd9ca10fac4447bfaa7bd665a88e1033", - "max": 19040902, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_27b6a73af53d4cf1946ae2ece8c499e2", - "value": 19040902 + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "7fd4ee74216249ae806b5d4045da9523": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", + "c92cde2b50804c18ace89bc605d1d6d1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bbbc8e741a0b44ef835e10fee58bbadf", - "placeholder": "​", - "style": "IPY_MODEL_1fbd0891d5a24a54ae54656b0d8a6247", - "value": " 19.0M/19.0M [00:02<00:00, 7.08MB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "b5fd41d1dba0476491cb311bd4d47741": { + "ca7870dc84ec48c681f6411595f321ad": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -6886,10 +6725,56 @@ "width": null } }, - "83474dc942a44919a4e48ee36b65f8f6": { + "cc666236572240f8b1015f187a2f66d9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "cc951613d15d49bbb24409d46b06c1b6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cef20c5f499646e291329b580cf3800f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d30b92db93594fe8b2f83241bb498f78": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -6938,25 +6823,26 @@ "width": null } }, - "77002ce5084c44b8b06987bee947f099": { + "d6b75db679df4a849b362c77df481e30": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "dd9ca10fac4447bfaa7bd665a88e1033": { + "d88597e336ad4297a4aa6bd3d7fdc5dd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -7005,26 +6891,47 @@ "width": null } }, - "27b6a73af53d4cf1946ae2ece8c499e2": { + "db68c63dcd244ca9b8b391559f8abfd1": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_158d4dd4e4f7495e9d2d6f360c29bf02", + "IPY_MODEL_16dd8588f2464d4281c0dde85cc28c6d", + "IPY_MODEL_ad2279ff1d4d47068d037ca698005140" + ], + "layout": "IPY_MODEL_ca7870dc84ec48c681f6411595f321ad" + } + }, + "dd3eef846edd4b5382b97ed6dce2c6d6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "bbbc8e741a0b44ef835e10fee58bbadf": { + "dd9ca10fac4447bfaa7bd665a88e1033": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -7073,68 +6980,94 @@ "width": null } }, - "1fbd0891d5a24a54ae54656b0d8a6247": { + "e191dbd0809c47c7ba28d3f6a0fcb1c5": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7410dd99fca940159ba8d13c9c52bae3", + "placeholder": "​", + "style": "IPY_MODEL_6710ae95aea9445ab998afb5d0bb3241", + "value": " 443/443 [00:00<00:00, 35.6kB/s]" } }, - "501f90bcf1ff4efe81cd377df249415e": { + "e2c52e10ab294bc68bcb1caadaf8d0c7": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_aa29504327084ed5816d90ca3f9e9f16", - "IPY_MODEL_71c123550e2a4166955eef2f142170fb", - "IPY_MODEL_15b01052fc6140e2be8fae7c2d2928fa" - ], - "layout": "IPY_MODEL_8badd98fa463404aa80f604b45f4a912" + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b1140f9f312441a8a32b8c7a9461baac", + "max": 79, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4c955a42756d47eea9a00a87c4b5f0f0", + "value": 79 } }, - "aa29504327084ed5816d90ca3f9e9f16": { + "e37f90735fd94698a4172e0292da7c9f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e409114ff67443ca92cb46ffb0697b58": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", + "_view_name": "ProgressView", + "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_407cf90293e44890903a4d89ee08008a", - "placeholder": "​", - "style": "IPY_MODEL_18df3037afae470e8ac9d297f93fd9ce", - "value": "Generating corpus split: 100%" + "layout": "IPY_MODEL_944f4a261b5240408ab7fc473c7b0835", + "max": 279, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_32b556bd8ddc4ef196eba4d1fd6b6b62", + "value": 279 } }, - "71c123550e2a4166955eef2f142170fb": { + "e5486dfd2ecd411eb60f3e3a89b64660": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -7147,18 +7080,70 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_9c0796729bb0455d92a4f418e86fa38a", - "max": 25657, + "layout": "IPY_MODEL_90eac81b19a04354ad842f3fbe87e694", + "max": 17098107, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_5baf674c98a846e1a79fda9c8ee77e78", - "value": 25657 + "style": "IPY_MODEL_cc666236572240f8b1015f187a2f66d9", + "value": 17098107 + } + }, + "e582de00e2af4948b9f072653c787712": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "15b01052fc6140e2be8fae7c2d2928fa": { + "e7a255788ec94998924142f4255ce409": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -7170,16 +7155,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_11a0fb71fd0e486982215656adcd2bdc", + "layout": "IPY_MODEL_d30b92db93594fe8b2f83241bb498f78", "placeholder": "​", - "style": "IPY_MODEL_5be1ec5880cb459fb7a88ae7c1f2394f", - "value": " 25657/25657 [00:00<00:00, 77019.91 examples/s]" + "style": "IPY_MODEL_9f679d0c8c5e4bf59111200f955ae8d7", + "value": "special_tokens_map.json: 100%" } }, - "8badd98fa463404aa80f604b45f4a912": { + "e95487a1fdac44fdb47b991d8ba87c3c": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -7228,10 +7213,10 @@ "width": null } }, - "407cf90293e44890903a4d89ee08008a": { + "ea704757587d4b09af079e555d6f57d1": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -7280,10 +7265,10 @@ "width": null } }, - "18df3037afae470e8ac9d297f93fd9ce": { + "eb6fa6ee9e74440bb4ce2a92ee4548c7": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -7295,10 +7280,31 @@ "description_width": "" } }, - "9c0796729bb0455d92a4f418e86fa38a": { + "f1dd6f744bf34a3aa1ecd72115f63155": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5aae76de0c3b42fb81642990d8bbdf93", + "placeholder": "​", + "style": "IPY_MODEL_cef20c5f499646e291329b580cf3800f", + "value": "tokenizer.json: 100%" + } + }, + "f22ae0abe3cb40a5b97559c7216400e7": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -7347,26 +7353,34 @@ "width": null } }, - "5baf674c98a846e1a79fda9c8ee77e78": { + "f2667dbbb4c5462986c9cad904767540": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_dd9ca10fac4447bfaa7bd665a88e1033", + "max": 19040902, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_27b6a73af53d4cf1946ae2ece8c499e2", + "value": 19040902 } }, - "11a0fb71fd0e486982215656adcd2bdc": { + "f2f00b1f73954d95b95889fa1a34c5ae": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -7415,10 +7429,10 @@ "width": null } }, - "5be1ec5880cb459fb7a88ae7c1f2394f": { + "f451f6f10dfc45049f07c44e12b04836": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -7432,8 +7446,8 @@ }, "f6b5cd5ff9704a58986eff2c9c88db4c": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -7452,76 +7466,10 @@ "layout": "IPY_MODEL_5cd148642750417abd38cbf483ccf1f9" } }, - "691424012c59434f8cc17f3d6aa001f3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8cad72468680488aa62c33186cedf084", - "placeholder": "​", - "style": "IPY_MODEL_21b3538c53cd4ff5895a817782884101", - "value": "100%" - } - }, - "e2c52e10ab294bc68bcb1caadaf8d0c7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b1140f9f312441a8a32b8c7a9461baac", - "max": 79, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_4c955a42756d47eea9a00a87c4b5f0f0", - "value": 79 - } - }, - "ffba63bef7e944b7923b2b29f9495527": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b8f872d00274483c951804adeef7c500", - "placeholder": "​", - "style": "IPY_MODEL_cc951613d15d49bbb24409d46b06c1b6", - "value": " 79/79 [06:46<00:00, 4.08s/it]" - } - }, - "5cd148642750417abd38cbf483ccf1f9": { + "f79797ffd6a649b1a0edae63eee91bea": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -7570,10 +7518,10 @@ "width": null } }, - "8cad72468680488aa62c33186cedf084": { + "f918256ef4874941a1ec098ea5050f6a": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -7622,25 +7570,10 @@ "width": null } }, - "21b3538c53cd4ff5895a817782884101": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b1140f9f312441a8a32b8c7a9461baac": { + "fb03d4b1113a42d1914557a26058d82f": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -7689,26 +7622,62 @@ "width": null } }, - "4c955a42756d47eea9a00a87c4b5f0f0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", + "fc2403f083124228befe690caad6dd3d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "b8f872d00274483c951804adeef7c500": { + "fd80d3b519124a759df834da4af06967": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -7757,19 +7726,25 @@ "width": null } }, - "cc951613d15d49bbb24409d46b06c1b6": { + "ffba63bef7e944b7923b2b29f9495527": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b8f872d00274483c951804adeef7c500", + "placeholder": "​", + "style": "IPY_MODEL_cc951613d15d49bbb24409d46b06c1b6", + "value": " 79/79 [06:46<00:00, 4.08s/it]" } } } diff --git a/examples/audio_search/main.ipynb b/examples/audio_search/main.ipynb index 49cab5ce..ce19c4cd 100644 --- a/examples/audio_search/main.ipynb +++ b/examples/audio_search/main.ipynb @@ -1,959 +1,951 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "3lhhVh6TWRjq" - }, - "source": [ - "# Audio Similarity Search using Vector Embeddings\n", - "This notebook demonstrates how to create vector embeddings of audio files to store into the LanceDB vector store, and then to find similar audio files.\n", - "We will be using [panns_inference package](https://github.com/qiuqiangkong/panns_inference) to tag the audio and create embeddings. We'll also be using this [HuggingFace dataset](https://huggingface.co/datasets/ashraq/esc50) for the audio files. The dataset contains 2,000 sounds and labels." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Installing dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Pks8RDrdWRjt", - "outputId": "387f9c04-f6c5-42ec-f7ba-87a3ae654162" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting datasets\n", - " Downloading datasets-2.14.6-py3-none-any.whl (493 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m493.7/493.7 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.23.5)\n", - "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n", - "Collecting dill<0.3.8,>=0.3.0 (from datasets)\n", - " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n", - "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n", - "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.1)\n", - "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.4.1)\n", - "Collecting multiprocess (from datasets)\n", - " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.8.6)\n", - "Collecting huggingface-hub<1.0.0,>=0.14.0 (from datasets)\n", - " Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.2)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n", - "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (3.3.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.12.4)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.5.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2023.7.22)\n", - "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3.post1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", - "Installing collected packages: dill, multiprocess, huggingface-hub, datasets\n", - "Successfully installed datasets-2.14.6 dill-0.3.7 huggingface-hub-0.18.0 multiprocess-0.70.15\n", - "Collecting lancedb\n", - " Downloading lancedb-0.3.1-py3-none-any.whl (60 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.4/60.4 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting deprecation (from lancedb)\n", - " Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)\n", - "Collecting pylance==0.8.3 (from lancedb)\n", - " Downloading pylance-0.8.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m38.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting ratelimiter~=1.0 (from lancedb)\n", - " Downloading ratelimiter-1.2.0.post0-py3-none-any.whl (6.6 kB)\n", - "Collecting retry>=0.9.2 (from lancedb)\n", - " Downloading retry-0.9.2-py2.py3-none-any.whl (8.0 kB)\n", - "Requirement already satisfied: tqdm>=4.1.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (4.66.1)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from lancedb) (3.8.6)\n", - "Requirement already satisfied: pydantic>=1.10 in /usr/local/lib/python3.10/dist-packages (from lancedb) (1.10.13)\n", - "Requirement already satisfied: attrs>=21.3.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (23.1.0)\n", - "Collecting semver>=3.0 (from lancedb)\n", - " Downloading semver-3.0.2-py3-none-any.whl (17 kB)\n", - "Requirement already satisfied: cachetools in /usr/local/lib/python3.10/dist-packages (from lancedb) (5.3.1)\n", - "Requirement already satisfied: pyyaml>=6.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (6.0.1)\n", - "Requirement already satisfied: click>=8.1.7 in /usr/local/lib/python3.10/dist-packages (from lancedb) (8.1.7)\n", - "Requirement already satisfied: requests>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.31.0)\n", - "Collecting pyarrow>=10 (from pylance==0.8.3->lancedb)\n", - " Downloading pyarrow-13.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.0/40.0 MB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from pylance==0.8.3->lancedb) (1.23.5)\n", - "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (4.5.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.3.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2023.7.22)\n", - "Requirement already satisfied: decorator>=3.4.2 in /usr/local/lib/python3.10/dist-packages (from retry>=0.9.2->lancedb) (4.4.2)\n", - "Collecting py<2.0.0,>=1.4.26 (from retry>=0.9.2->lancedb)\n", - " Downloading py-1.11.0-py2.py3-none-any.whl (98 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->lancedb) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->lancedb) (4.0.3)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->lancedb) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->lancedb) (1.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->lancedb) (1.3.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from deprecation->lancedb) (23.2)\n", - "Installing collected packages: ratelimiter, semver, pyarrow, py, deprecation, retry, pylance, lancedb\n", - " Attempting uninstall: pyarrow\n", - " Found existing installation: pyarrow 9.0.0\n", - " Uninstalling pyarrow-9.0.0:\n", - " Successfully uninstalled pyarrow-9.0.0\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "ibis-framework 6.2.0 requires pyarrow<13,>=2, but you have pyarrow 13.0.0 which is incompatible.\n", - "pandas-gbq 0.17.9 requires pyarrow<10.0dev,>=3.0.0, but you have pyarrow 13.0.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed deprecation-2.1.0 lancedb-0.3.1 py-1.11.0 pyarrow-13.0.0 pylance-0.8.3 ratelimiter-1.2.0.post0 retry-0.9.2 semver-3.0.2\n" - ] - } - ], - "source": [ - "!pip install panns-inference tqdm --q\n", - "!pip3 install datasets\n", - "!pip install lancedb" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Importing all the libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "hToUqkBBWto1" - }, - "outputs": [], - "source": [ - "import lancedb" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fF08IHEDalKU" - }, - "source": [ - "**NOTE** : if you get any error while importing lancedb just you need to restart runtime" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "0jIb2Gr8WRju" - }, - "outputs": [], - "source": [ - "from datasets import load_dataset\n", - "from panns_inference import AudioTagging\n", - "from tqdm import tqdm\n", - "from IPython.display import Audio, display\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "x6QfsfHlWRju" - }, - "source": [ - "On devices that have CUDA installed, you may be able to install torch's CUDA supported version.\n", - "```bash\n", - "pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n", - "```\n", - "If you don't have CUDA or a GPU (or different os), you can install torch here: https://pytorch.org/get-started/locally/" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Load data" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "fyjp-ffQWRjv", - "outputId": "edb7fdfa-27e7-4b00-fa2d-409bbf1d23b8" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Repo card metadata block was not found. Setting CardData to empty.\n", - "WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Checkpoint path: /root/panns_data/Cnn14_mAP=0.431.pth\n", - "GPU number: 1\n" - ] - } - ], - "source": [ - "dataset = load_dataset(\"ashraq/esc50\", split=\"train\")\n", - "at = AudioTagging(checkpoint_path=None, device=\"cuda\") # device=\"cpu\" for CPU inference" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Zm9Qz9WVWRjv", - "outputId": "4cfd5f6d-3d83-4930-ceaf-9cd4c80eb774" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Dataset({\n", - " features: ['filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take', 'audio'],\n", - " num_rows: 2000\n", - "})" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "INND51clWRjv" - }, - "source": [ - "### Create Embeddings\n", - "Now, to create the data embeddings! We can start by creating batches of 70 for the data, keeping track of the most important columns: `category` and `audio`." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "VKflK56YWRjv" - }, - "outputs": [], - "source": [ - "batches = [batch[\"audio\"] for batch in dataset.iter(50)]\n", - "meta_batches = [batch[\"category\"] for batch in dataset.iter(50)]\n", - "audio_data = [np.array([audio[\"array\"] for audio in batch]) for batch in batches]\n", - "meta_data = [np.array([meta for meta in batch]) for batch in meta_batches]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "B4mB3sa2WRjw" - }, - "source": [ - "We now want to iterate through these batches, and for each audio file, we want to use the AudioTagging embedder to extract the embedding. Then, we can store these embeddings, audio files, and category name into a list of dictionaries. Each dictionary has to contain a `vector` column in order to add to the LanceDB table, if no embedding function is provided." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "pdt1n8S7WRjw", - "outputId": "96d4b5c6-b1c2-497f-c35f-d5905548f6f0" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 40/40 [00:19<00:00, 2.06it/s]\n" - ] - } - ], - "source": [ - "for i in tqdm(range(len(audio_data))):\n", - " (_, embedding) = at.inference(audio_data[i])\n", - " data = [\n", - " {\n", - " \"audio\": x[0][\"array\"],\n", - " \"vector\": x[1],\n", - " \"sampling_rate\": x[0][\"sampling_rate\"],\n", - " \"category\": meta_data[i][j],\n", - " }\n", - " for j, x in enumerate(zip(batches[i], embedding))\n", - " ]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CRpnHjJbWRjw" - }, - "source": [ - "Once we have this data list, we can create a LanceDB table by first connecting to a certain directory before, and then calling `db.create_table()`. If the table already exists, we open the table and add the data." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Add the VectorStore" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "3lh_d6m3WRjw" - }, - "outputs": [], - "source": [ - "# Connect to directory at the top of the file\n", - "db = lancedb.connect(\"data/audio-lancedb\")\n", - "table_name = \"audio-search\"\n", - "\n", - "if table_name not in db.table_names():\n", - " tbl = db.create_table(table_name, data)\n", - "else:\n", - " tbl = db.open_table(table_name)\n", - " tbl.add(data)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "m7WfeIv8WRjw" - }, - "source": [ - "We can now combine all of this into a single function:" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Composite function" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "XnCHvlLsWRjw" - }, - "outputs": [], - "source": [ - "def insert_audio():\n", - " batches = [batch[\"audio\"] for batch in dataset.iter(20)]\n", - " meta_batches = [batch[\"category\"] for batch in dataset.iter(20)]\n", - " audio_data = [np.array([audio[\"array\"] for audio in batch]) for batch in batches]\n", - " meta_data = [np.array([meta for meta in batch]) for batch in meta_batches]\n", - " print(\"Start\")\n", - " for i in tqdm(range(len(audio_data))):\n", - " (_, embedding) = at.inference(audio_data[i])\n", - " data = [\n", - " {\n", - " \"audio\": x[0][\"array\"],\n", - " \"vector\": x[1],\n", - " \"sampling_rate\": x[0][\"sampling_rate\"],\n", - " \"category\": meta_data[i][j],\n", - " }\n", - " for j, x in enumerate(zip(batches[i], embedding))\n", - " ]\n", - " if table_name not in db.table_names():\n", - " tbl = db.create_table(table_name, data)\n", - " else:\n", - " tbl = db.open_table(table_name)\n", - " tbl.add(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "UvEhnuLyWRjw" - }, - "outputs": [], - "source": [ - "import shutil\n", - "\n", - "shutil.rmtree(\"data/audio-lancedb/audio-search.lance\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "TXxGHwZdgZrG" - }, - "outputs": [], - "source": [ - "insert_audio()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vr9LehNiiUNb" - }, - "source": [ - "NOTE: if you get out of ram .next time simply run all cells & uncomment this lines #insert_audio" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mPBphF19WRjx" - }, - "source": [ - "Great! We now have a fully populated table with all the necessary information. The next step would be to query the table and find those similar audio files. We can do this by first opening the table, and then getting the specific audio file we want to search for." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Query the database" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 93 - }, - "id": "ZsGYl6YSWRjx", - "outputId": "8cc83527-0540-47aa-99b5-054530cf5615" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "3lhhVh6TWRjq" + }, + "source": [ + "# Audio Similarity Search using Vector Embeddings\n", + "This notebook demonstrates how to create vector embeddings of audio files to store into the LanceDB vector store, and then to find similar audio files.\n", + "We will be using [panns_inference package](https://github.com/qiuqiangkong/panns_inference) to tag the audio and create embeddings. We'll also be using this [HuggingFace dataset](https://huggingface.co/datasets/ashraq/esc50) for the audio files. The dataset contains 2,000 sounds and labels." ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Category: water_drops\n" - ] - } - ], - "source": [ - "tbl = db.open_table(table_name)\n", - "audio = dataset[50][\"audio\"][\"array\"]\n", - "category = dataset[50][\"category\"]\n", - "display(Audio(audio, rate=dataset[50][\"audio\"][\"sampling_rate\"]))\n", - "print(\"Category:\", category)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Et2C9t87WRjx" - }, - "source": [ - "Next, we call the embedding function again to create those embeddings, which would allow us to search our table." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ZmXOqB2FWRjx", - "outputId": "05659b4c-acb6-4514-e3c9-d96ecdf84f1a" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " audio \\\n", - "0 [0.00506591796875, 0.00653076171875, 0.0051574... \n", - "1 [-0.157318115234375, -0.122344970703125, -0.17... \n", - "2 [-0.0162353515625, -0.015716552734375, -0.0150... \n", - "3 [-0.0008544921875, -0.000762939453125, -0.0005... \n", - "4 [-0.003753662109375, -0.004119873046875, -0.00... \n", - "\n", - " vector sampling_rate \\\n", - "0 [0.0, 0.70255554, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n", - "1 [0.0, 0.68818694, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n", - "2 [0.0, 0.58163136, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n", - "3 [0.0, 1.0475253, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... 44100 \n", - "4 [0.0, 0.45124823, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n", - "\n", - " category _distance \n", - "0 water_drops 52.260368 \n", - "1 water_drops 57.536537 \n", - "2 water_drops 75.637558 \n", - "3 drinking_sipping 76.979111 \n", - "4 water_drops 77.981865 \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - ":2: DeprecatedWarning: to_df is deprecated as of 0.3.1 and will be removed in 0.4.0. Use the bar function instead\n", - " result = tbl.search(embedding[0]).limit(5).to_df()\n" - ] - } - ], - "source": [ - "(_, embedding) = at.inference(audio[None, :])\n", - "result = tbl.search(embedding[0]).limit(5).to_df()\n", - "print(result)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 396 - }, - "id": "enl39Zp8WRjx", - "outputId": "305805b6-1540-4708-8345-071083221c80" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0. Category: water_drops\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "is73uCkAZLBj" + }, + "source": [ + "### Installing dependencies" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "1. Category: water_drops\n" - ] + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Pks8RDrdWRjt", + "outputId": "c66c58b2-4f84-4b9c-e563-acda96e620cd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (2.17.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.13.1)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.25.2)\n", + "Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (14.0.2)\n", + "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets) (0.6)\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.2)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.4.1)\n", + "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n", + "Requirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.3)\n", + "Requirement already satisfied: huggingface-hub>=0.19.4 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.20.3)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.4->datasets) (4.10.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2024.2.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.4)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", + "Requirement already satisfied: lancedb in /usr/local/lib/python3.10/dist-packages (0.6.1)\n", + "Requirement already satisfied: deprecation in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.1.0)\n", + "Requirement already satisfied: pylance==0.10.1 in /usr/local/lib/python3.10/dist-packages (from lancedb) (0.10.1)\n", + "Requirement already satisfied: ratelimiter~=1.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (1.2.0.post0)\n", + "Requirement already satisfied: retry>=0.9.2 in /usr/local/lib/python3.10/dist-packages (from lancedb) (0.9.2)\n", + "Requirement already satisfied: tqdm>=4.27.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (4.66.2)\n", + "Requirement already satisfied: pydantic>=1.10 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.6.3)\n", + "Requirement already satisfied: attrs>=21.3.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (23.2.0)\n", + "Requirement already satisfied: semver>=3.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (3.0.2)\n", + "Requirement already satisfied: cachetools in /usr/local/lib/python3.10/dist-packages (from lancedb) (5.3.3)\n", + "Requirement already satisfied: pyyaml>=6.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (6.0.1)\n", + "Requirement already satisfied: click>=8.1.7 in /usr/local/lib/python3.10/dist-packages (from lancedb) (8.1.7)\n", + "Requirement already satisfied: requests>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.31.0)\n", + "Requirement already satisfied: overrides>=0.7 in /usr/local/lib/python3.10/dist-packages (from lancedb) (7.7.0)\n", + "Requirement already satisfied: pyarrow>=12 in /usr/local/lib/python3.10/dist-packages (from pylance==0.10.1->lancedb) (14.0.2)\n", + "Requirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from pylance==0.10.1->lancedb) (1.25.2)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (0.6.0)\n", + "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (2.16.3)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (4.10.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2024.2.2)\n", + "Requirement already satisfied: decorator>=3.4.2 in /usr/local/lib/python3.10/dist-packages (from retry>=0.9.2->lancedb) (4.4.2)\n", + "Requirement already satisfied: py<2.0.0,>=1.4.26 in /usr/local/lib/python3.10/dist-packages (from retry>=0.9.2->lancedb) (1.11.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from deprecation->lancedb) (23.2)\n" + ] + } + ], + "source": [ + "!pip install panns-inference tqdm --q\n", + "!pip3 install datasets\n", + "!pip install lancedb" + ] }, { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "ZJsz8MnDZLBn" + }, + "source": [ + "### Importing all the libraries" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "2. Category: water_drops\n" - ] + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "hToUqkBBWto1" + }, + "outputs": [], + "source": [ + "import lancedb" + ] }, { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "fF08IHEDalKU" + }, + "source": [ + "**NOTE** : if you get any error while importing lancedb just you need to restart runtime" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "3. Category: drinking_sipping\n" - ] + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "0jIb2Gr8WRju" + }, + "outputs": [], + "source": [ + "from datasets import load_dataset\n", + "from panns_inference import AudioTagging\n", + "from tqdm import tqdm\n", + "from IPython.display import Audio, display\n", + "import numpy as np" + ] }, { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" + "cell_type": "markdown", + "metadata": { + "id": "x6QfsfHlWRju" + }, + "source": [ + "On devices that have CUDA installed, you may be able to install torch's CUDA supported version.\n", + "```bash\n", + "pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n", + "```\n", + "If you don't have CUDA or a GPU (or different os), you can install torch here: https://pytorch.org/get-started/locally/" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "4. Category: water_drops\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "mMy-7PPNZLBr" + }, + "source": [ + "### Load data" + ] }, { - "data": { - "text/html": [ - "\n", - " \n", - " " + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fyjp-ffQWRjv", + "outputId": "0eb8ecb4-aed5-453a-96e4-d956645e4555" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/repocard.py:105: UserWarning: Repo card metadata block was not found. Setting CardData to empty.\n", + " warnings.warn(\"Repo card metadata block was not found. Setting CardData to empty.\")\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Checkpoint path: /root/panns_data/Cnn14_mAP=0.431.pth\n", + "GPU number: 1\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "dataset = load_dataset(\"ashraq/esc50\", split=\"train\")\n", + "at = AudioTagging(checkpoint_path=None, device=\"cuda\") # device=\"cpu\" for CPU inference" ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "for i in range(len(result)):\n", - " print(str(i) + \". Category:\", result[\"category\"][i])\n", - " display(Audio(result[\"audio\"][i], rate=result[\"sampling_rate\"][i]))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mZtR0bxXWRjx" - }, - "source": [ - "Nice! It seems to be working! We can compile this into another function here, that takes an `id` of the audio from 0 to 1,999." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Search Audio using IDs" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "wc1X8MuDWRjx" - }, - "outputs": [], - "source": [ - "def search_audio(id):\n", - " tbl = db.open_table(table_name)\n", - " audio = dataset[id][\"audio\"][\"array\"]\n", - " category = dataset[id][\"category\"]\n", - " display(Audio(audio, rate=dataset[id][\"audio\"][\"sampling_rate\"]))\n", - " print(\"Category:\", category)\n", - "\n", - " (_, embedding) = at.inference(audio[None, :])\n", - " result = tbl.search(embedding[0]).limit(5).to_df()\n", - " print(result)\n", - " for i in range(len(result)):\n", - " print(str(i) + \". Category:\", result[\"category\"][i])\n", - " display(Audio(result[\"audio\"][i], rate=result[\"sampling_rate\"][i]))" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 873 - }, - "id": "dQYVac1kWRjx", - "outputId": "4dd2f8c9-dfb0-475d-97e3-3a82398ee0fd" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zm9Qz9WVWRjv", + "outputId": "dcbdce06-309d-45cb-997c-37c89d9b6cc3" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take', 'audio'],\n", + " num_rows: 2000\n", + "})" + ] + }, + "metadata": {}, + "execution_count": 5 + } ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Category: car_horn\n", - " audio \\\n", - "0 [-0.022979736328125, -0.021820068359375, -0.02... \n", - "1 [0.313934326171875, 0.312774658203125, 0.31698... \n", - "2 [0.0655517578125, 0.011505126953125, -0.024536... \n", - "3 [0.063690185546875, 0.065216064453125, 0.07296... \n", - "4 [-0.006866455078125, -0.007476806640625, -0.00... \n", - "\n", - " vector sampling_rate \\\n", - "0 [0.0, 0.12407931, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n", - "1 [0.0, 0.5878662, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... 44100 \n", - "2 [0.0, 0.7369921, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... 44100 \n", - "3 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... 44100 \n", - "4 [0.0, 0.42053863, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n", - "\n", - " category _distance \n", - "0 airplane 85.660744 \n", - "1 washing_machine 91.059021 \n", - "2 vacuum_cleaner 110.453613 \n", - "3 clapping 111.933456 \n", - "4 footsteps 115.770416 \n", - "0. Category: airplane\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - ":9: DeprecatedWarning: to_df is deprecated as of 0.3.1 and will be removed in 0.4.0. Use the bar function instead\n", - " result = tbl.search(embedding[0]).limit(5).to_df()\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " " + "source": [ + "dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "INND51clWRjv" + }, + "source": [ + "### Create Embeddings\n", + "Now, to create the data embeddings! We can start by creating batches of 70 for the data, keeping track of the most important columns: `category` and `audio`." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "VKflK56YWRjv" + }, + "outputs": [], + "source": [ + "batches = [batch[\"audio\"] for batch in dataset.iter(50)]\n", + "meta_batches = [batch[\"category\"] for batch in dataset.iter(50)]\n", + "audio_data = [np.array([audio[\"array\"] for audio in batch]) for batch in batches]\n", + "meta_data = [np.array([meta for meta in batch]) for batch in meta_batches]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B4mB3sa2WRjw" + }, + "source": [ + "We now want to iterate through these batches, and for each audio file, we want to use the AudioTagging embedder to extract the embedding. Then, we can store these embeddings, audio files, and category name into a list of dictionaries. Each dictionary has to contain a `vector` column in order to add to the LanceDB table, if no embedding function is provided." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pdt1n8S7WRjw", + "outputId": "26abb853-33b2-4a86-a41a-6f188e7d4d46" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "100%|██████████| 40/40 [00:13<00:00, 2.99it/s]\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "for i in tqdm(range(len(audio_data))):\n", + " (_, embedding) = at.inference(audio_data[i])\n", + " data = [\n", + " {\n", + " \"audio\": x[0][\"array\"],\n", + " \"vector\": x[1],\n", + " \"sampling_rate\": x[0][\"sampling_rate\"],\n", + " \"category\": meta_data[i][j],\n", + " }\n", + " for j, x in enumerate(zip(batches[i], embedding))\n", + " ]" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "1. Category: washing_machine\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "CRpnHjJbWRjw" + }, + "source": [ + "Once we have this data list, we can create a LanceDB table by first connecting to a certain directory before, and then calling `db.create_table()`. If the table already exists, we open the table and add the data." + ] }, { - "data": { - "text/html": [ - "\n", - " \n", - " " + "cell_type": "markdown", + "metadata": { + "id": "PDGjLT4UZLBu" + }, + "source": [ + "### Add the VectorStore" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "3lh_d6m3WRjw", + "outputId": "691acc64-5791-42b4-9f0f-3992f54b62da", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Created Table\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "# Connect to directory at the top of the file\n", + "db = lancedb.connect(\"data/audio-lancedb\")\n", + "table_name = \"audio-search\"\n", + "\n", + "if table_name not in db.table_names():\n", + " print(\"Created Table\")\n", + " tbl = db.create_table(table_name, data)\n", + "else:\n", + " print(\"Inserting data\")\n", + " tbl = db.open_table(table_name)\n", + " tbl.add(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m7WfeIv8WRjw" + }, + "source": [ + "We can now combine all of this into a single function:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ARFCQPPjZLBu" + }, + "source": [ + "### Composite function" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "XnCHvlLsWRjw" + }, + "outputs": [], + "source": [ + "def insert_audio():\n", + " batches = [batch[\"audio\"] for batch in dataset.iter(20)]\n", + " meta_batches = [batch[\"category\"] for batch in dataset.iter(20)]\n", + " audio_data = [np.array([audio[\"array\"] for audio in batch]) for batch in batches]\n", + " meta_data = [np.array([meta for meta in batch]) for batch in meta_batches]\n", + " print(\"Start\")\n", + " for i in tqdm(range(len(audio_data))):\n", + " (_, embedding) = at.inference(audio_data[i])\n", + " data = [\n", + " {\n", + " \"audio\": x[0][\"array\"],\n", + " \"vector\": x[1],\n", + " \"sampling_rate\": x[0][\"sampling_rate\"],\n", + " \"category\": meta_data[i][j],\n", + " }\n", + " for j, x in enumerate(zip(batches[i], embedding))\n", + " ]\n", + " if table_name not in db.table_names():\n", + " tbl = db.create_table(table_name, data)\n", + " else:\n", + " tbl = db.open_table(table_name)\n", + " tbl.add(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "UvEhnuLyWRjw" + }, + "outputs": [], + "source": [ + "import shutil\n", + "\n", + "shutil.rmtree(\"data/audio-lancedb/audio-search.lance\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vr9LehNiiUNb" + }, + "source": [ + "NOTE: if you get out of memory, then next time Run all cells & uncomment this lines #insert_audio()" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "2. Category: vacuum_cleaner\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TXxGHwZdgZrG" + }, + "outputs": [], + "source": [ + "# insert_audio()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mPBphF19WRjx" + }, + "source": [ + "Great! We now have a fully populated table with all the necessary information. The next step would be to query the table and find those similar audio files. We can do this by first opening the table, and then getting the specific audio file we want to search for." + ] }, { - "data": { - "text/html": [ - "\n", - " \n", - " " + "cell_type": "markdown", + "metadata": { + "id": "7B-mrGM6ZLBy" + }, + "source": [ + "### Query the database" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 93 + }, + "id": "ZsGYl6YSWRjx", + "outputId": "7a743814-a168-4fb7-84d4-4c303c55ccea" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Category: water_drops\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "tbl = db.open_table(table_name)\n", + "audio = dataset[50][\"audio\"][\"array\"]\n", + "category = dataset[50][\"category\"]\n", + "display(Audio(audio, rate=dataset[50][\"audio\"][\"sampling_rate\"]))\n", + "print(\"Category:\", category)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "3. Category: clapping\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "Et2C9t87WRjx" + }, + "source": [ + "Next, we call the embedding function again to create those embeddings, which would allow us to search our table." + ] }, { - "data": { - "text/html": [ - "\n", - " \n", - " " + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZmXOqB2FWRjx", + "outputId": "ed6c36a6-66a7-440d-f8fa-c693e61df0b2" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " audio \\\n", + "0 [0.00506591796875, 0.00653076171875, 0.0051574... \n", + "1 [-0.157318115234375, -0.122344970703125, -0.17... \n", + "2 [-0.0162353515625, -0.015716552734375, -0.0150... \n", + "3 [-0.0008544921875, -0.000762939453125, -0.0005... \n", + "4 [-0.003753662109375, -0.004119873046875, -0.00... \n", + "\n", + " vector sampling_rate \\\n", + "0 [0.0, 0.70255554, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n", + "1 [0.0, 0.68818694, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n", + "2 [0.0, 0.58163136, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n", + "3 [0.0, 1.0475253, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... 44100 \n", + "4 [0.0, 0.45124823, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n", + "\n", + " category _distance \n", + "0 water_drops 52.260319 \n", + "1 water_drops 57.536579 \n", + "2 water_drops 75.637405 \n", + "3 drinking_sipping 76.979073 \n", + "4 water_drops 77.981728 \n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":2: UnsupportedWarning: to_df is unsupported as of 0.4.0. Use to_pandas() instead\n", + " result = tbl.search(embedding[0]).limit(5).to_df()\n" + ] + } ], - "text/plain": [ - "" + "source": [ + "(_, embedding) = at.inference(audio[None, :])\n", + "result = tbl.search(embedding[0]).limit(5).to_df()\n", + "print(result)" ] - }, - "metadata": {}, - "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "4. Category: footsteps\n" - ] + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 396 + }, + "id": "enl39Zp8WRjx", + "outputId": "296de741-d483-4471-92f4-a263abf1d262" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0. Category: water_drops\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1. Category: water_drops\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2. Category: water_drops\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "3. Category: drinking_sipping\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "4. Category: water_drops\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + } + ], + "source": [ + "for i in range(len(result)):\n", + " print(str(i) + \". Category:\", result[\"category\"][i])\n", + " display(Audio(result[\"audio\"][i], rate=result[\"sampling_rate\"][i]))" + ] }, { - "data": { - "text/html": [ - "\n", - " \n", - " " + "cell_type": "markdown", + "metadata": { + "id": "mZtR0bxXWRjx" + }, + "source": [ + "Nice! It seems to be working! We can compile this into another function here, that takes an `id` of the audio from 0 to 1,999." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OPrn-NAYZLB0" + }, + "source": [ + "### Search Audio using IDs" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "id": "wc1X8MuDWRjx" + }, + "outputs": [], + "source": [ + "def search_audio(id):\n", + " tbl = db.open_table(table_name)\n", + " audio = dataset[id][\"audio\"][\"array\"]\n", + " category = dataset[id][\"category\"]\n", + " display(Audio(audio, rate=dataset[id][\"audio\"][\"sampling_rate\"]))\n", + " print(\"Category:\", category)\n", + "\n", + " (_, embedding) = at.inference(audio[None, :])\n", + " result = tbl.search(embedding[0]).limit(5).to_df()\n", + " print(result)\n", + " for i in range(len(result)):\n", + " print(str(i) + \". Category:\", result[\"category\"][i])\n", + " display(Audio(result[\"audio\"][i], rate=result[\"sampling_rate\"][i]))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 853 + }, + "id": "dQYVac1kWRjx", + "outputId": "a1ea8e7d-acee-4bb4-a008-6ee90d097cc8" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Category: car_horn\n", + " audio \\\n", + "0 [-0.022979736328125, -0.021820068359375, -0.02... \n", + "1 [0.313934326171875, 0.312774658203125, 0.31698... \n", + "2 [0.0655517578125, 0.011505126953125, -0.024536... \n", + "3 [0.063690185546875, 0.065216064453125, 0.07296... \n", + "4 [-0.006866455078125, -0.007476806640625, -0.00... \n", + "\n", + " vector sampling_rate \\\n", + "0 [0.0, 0.12407931, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n", + "1 [0.0, 0.5878662, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... 44100 \n", + "2 [0.0, 0.7369921, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... 44100 \n", + "3 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... 44100 \n", + "4 [0.0, 0.42053863, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n", + "\n", + " category _distance \n", + "0 airplane 85.660736 \n", + "1 washing_machine 91.059029 \n", + "2 vacuum_cleaner 110.453621 \n", + "3 clapping 111.933441 \n", + "4 footsteps 115.770401 \n", + "0. Category: airplane\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":9: UnsupportedWarning: to_df is unsupported as of 0.4.0. Use to_pandas() instead\n", + " result = tbl.search(embedding[0]).limit(5).to_df()\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1. Category: washing_machine\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2. Category: vacuum_cleaner\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "3. Category: clapping\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "4. Category: footsteps\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {} + } ], - "text/plain": [ - "" + "source": [ + "search_audio(125)" ] - }, - "metadata": {}, - "output_type": "display_data" } - ], - "source": [ - "search_audio(125)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3X3pePawWRjx" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.1" + } }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.1" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/product-recommender/lancedb_cloud/README.md b/examples/product-recommender/lancedb_cloud/README.md index 368725cc..309697f6 100644 --- a/examples/product-recommender/lancedb_cloud/README.md +++ b/examples/product-recommender/lancedb_cloud/README.md @@ -20,11 +20,18 @@ if you would like to set api key through an environment variable: ``` export LANCEDB_API_KEY="sk_..." ``` +or +``` +import os +import getpass + +os.environ["LANCEDB_API_KEY"] = getpass.getpass("Enter Your LANCEDB API Key:") +``` replace the following lines in main.py with your project slug and api key" ``` db_url = "db://your-project-name" - api_key="sk_..." +api_key="sk_..." ``` Run the script diff --git a/examples/product-recommender/lancedb_cloud/main.ipynb b/examples/product-recommender/lancedb_cloud/main.ipynb index f3c6f44c..c32d1ff2 100644 --- a/examples/product-recommender/lancedb_cloud/main.ipynb +++ b/examples/product-recommender/lancedb_cloud/main.ipynb @@ -14,71 +14,46 @@ }, { "cell_type": "markdown", + "metadata": { + "id": "sCtHNvkbzSot" + }, "source": [ "## Credentials\n", "\n", "Copy and paste the project name and the api key from your project page.\n", "These will be used later to [connect to LanceDB Cloud](#scroll-to=5q8m6GMD7sGu)" - ], - "metadata": { - "id": "sCtHNvkbzSot" - } + ] }, { "cell_type": "code", - "source": [ - "project_slug = \"your-project-slug\" # @param {type:\"string\"}" - ], + "execution_count": 2, "metadata": { "id": "zpPM2T8zzZkw" }, - "execution_count": 2, - "outputs": [] + "outputs": [], + "source": [ + "project_slug = \"your-project-slug\" # @param {type:\"string\"}" + ] }, { "cell_type": "code", - "source": [ - "api_key = \"sk_...\" # @param {type:\"string\"}" - ], + "execution_count": 3, "metadata": { "id": "xgCqtc99zwUQ" }, - "execution_count": 3, - "outputs": [] + "outputs": [], + "source": [ + "api_key = \"sk_...\" # @param {type:\"string\"}" + ] }, { "cell_type": "markdown", - "source": [ - "You can also set the LANCEDB_API_KEY as an environment variable with one of the options below" - ], "metadata": { "id": "eEITDnEczz7G" - } - }, - { - "cell_type": "code", - "source": [ - "!export LANCEDB_API_KEY=\"sk_...\"" - ], - "metadata": { - "id": "Md5kS8s7z0-j" }, - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "code", "source": [ - "import os\n", - "import getpass\n", - "\n", - "os.environ[\"LANCEDB_API_KEY\"] = getpass.getpass(\"Enter Your LANCEDB API Key:\")" - ], - "metadata": { - "id": "d7gq19Wez3JZ" - }, - "execution_count": null, - "outputs": [] + "You can also set the LANCEDB_API_KEY as an environment variable. More details can be found **here**." + ] }, { "cell_type": "markdown", @@ -94,19 +69,19 @@ "cell_type": "code", "execution_count": 4, "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, "id": "3jXSVspr7sGe", + "outputId": "4c09916d-85de-46d6-9c16-ed6746ac4e19", "vscode": { "languageId": "shellscript" - }, - "outputId": "4c09916d-85de-46d6-9c16-ed6746ac4e19", - "colab": { - "base_uri": "https://localhost:8080/" } }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "--2024-01-23 03:30:37-- http://vectordb-recipes.s3.us-west-2.amazonaws.com/product-recommender.zip\n", "Resolving vectordb-recipes.s3.us-west-2.amazonaws.com (vectordb-recipes.s3.us-west-2.amazonaws.com)... 3.5.84.12, 3.5.84.155, 3.5.84.131, ...\n", @@ -163,8 +138,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.23.5)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n", @@ -377,23 +352,14 @@ }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "13863749\n" ] }, { - "output_type": "execute_result", "data": { - "text/plain": [ - " user_id product_id total_orders\n", - "13863744 206209 48697 1\n", - "13863745 206209 48742 2\n", - "13863746 206210 46149 50\n", - "13863747 206211 27845 49\n", - "13863748 206211 26604 32" - ], "text/html": [ "\n", "
\n", @@ -662,10 +628,19 @@ "
\n", " \n", " \n" + ], + "text/plain": [ + " user_id product_id total_orders\n", + "13863744 206209 48697 1\n", + "13863745 206209 48742 2\n", + "13863746 206210 46149 50\n", + "13863747 206211 27845 49\n", + "13863748 206211 26604 32" ] }, + "execution_count": 11, "metadata": {}, - "execution_count": 11 + "output_type": "execute_result" } ], "source": [ @@ -758,26 +733,26 @@ }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/implicit/cpu/als.py:95: RuntimeWarning: OpenBLAS is configured to use 2 threads. It is highly recommended to disable its internal threadpool by setting the environment variable 'OPENBLAS_NUM_THREADS=1' or by calling 'threadpoolctl.threadpool_limits(1, \"blas\")'. Having OpenBLAS use a threadpool can lead to severe performance issues here.\n", " check_blas_config()\n" ] }, { - "output_type": "display_data", "data": { - "text/plain": [ - " 0%| | 0/50 [00:00\n", @@ -1379,11 +1342,23 @@ "\n", " \n", " \n" + ], + "text/plain": [ + " user_id product_id total_orders\n", + "0 1 196 11\n", + "1 1 10258 10\n", + "2 1 10326 1\n", + "3 1 12427 10\n", + "4 1 13032 4" ] }, + "execution_count": 21, "metadata": {}, - "execution_count": 21 + "output_type": "execute_result" } + ], + "source": [ + "data.head()" ] }, { @@ -1450,22 +1425,22 @@ "cell_type": "code", "execution_count": 24, "metadata": { - "id": "H8HyvjCFSeaz", - "outputId": "27519f2a-e95a-4442-97b1-291931180ca8", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "H8HyvjCFSeaz", + "outputId": "27519f2a-e95a-4442-97b1-291931180ca8" }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "{}" ] }, + "execution_count": 24, "metadata": {}, - "execution_count": 24 + "output_type": "execute_result" } ], "source": [ @@ -1550,33 +1525,7 @@ }, "outputs": [ { - "output_type": "display_data", "data": { - "text/plain": [ - " product_id product_name \\\n", - "0 196 Soda \n", - "1 46149 Zero Calorie Cola \n", - "2 40939 Drinking Water \n", - "3 37710 Trail Mix \n", - "4 22802 Mineral Water \n", - "5 41400 Crunchy Oats 'n Honey Granola Bars \n", - "6 46061 Popcorn \n", - "7 31651 Extra Fancy Unsalted Mixed Nuts \n", - "8 5258 Sparkling Water \n", - "9 38928 0% Greek Strained Yogurt \n", - "\n", - " vector _distance \n", - "0 [-0.0030924827, -0.0042996905, -0.01350651, -0... 35.096085 \n", - "1 [0.0015008126, -0.014029495, -0.015295635, 0.0... 35.392975 \n", - "2 [0.0018837166, -0.018152414, -0.015649604, 0.0... 35.864483 \n", - "3 [-0.0011668581, -0.0025222106, -0.016717039, -... 35.896873 \n", - "4 [-0.010115783, -0.017115017, -0.011403508, 0.0... 36.035912 \n", - "5 [0.0040870784, -0.0009994006, -0.018302424, -0... 36.042686 \n", - "6 [0.0036969625, -0.013887798, -0.002804261, -0.... 36.043732 \n", - "7 [0.014438897, -0.005578243, -0.0055169673, -0.... 36.117802 \n", - "8 [-0.022658644, -0.026015628, -0.0083606485, -0... 36.131721 \n", - "9 [0.0018425643, -0.011489441, -0.0052835834, 0.... 36.139870 " - ], "text/html": [ "\n", "
\n", @@ -1886,17 +1835,38 @@ "
\n", " \n", " \n" + ], + "text/plain": [ + " product_id product_name \\\n", + "0 196 Soda \n", + "1 46149 Zero Calorie Cola \n", + "2 40939 Drinking Water \n", + "3 37710 Trail Mix \n", + "4 22802 Mineral Water \n", + "5 41400 Crunchy Oats 'n Honey Granola Bars \n", + "6 46061 Popcorn \n", + "7 31651 Extra Fancy Unsalted Mixed Nuts \n", + "8 5258 Sparkling Water \n", + "9 38928 0% Greek Strained Yogurt \n", + "\n", + " vector _distance \n", + "0 [-0.0030924827, -0.0042996905, -0.01350651, -0... 35.096085 \n", + "1 [0.0015008126, -0.014029495, -0.015295635, 0.0... 35.392975 \n", + "2 [0.0018837166, -0.018152414, -0.015649604, 0.0... 35.864483 \n", + "3 [-0.0011668581, -0.0025222106, -0.016717039, -... 35.896873 \n", + "4 [-0.010115783, -0.017115017, -0.011403508, 0.0... 36.035912 \n", + "5 [0.0040870784, -0.0009994006, -0.018302424, -0... 36.042686 \n", + "6 [0.0036969625, -0.013887798, -0.002804261, -0.... 36.043732 \n", + "7 [0.014438897, -0.005578243, -0.0055169673, -0.... 36.117802 \n", + "8 [-0.022658644, -0.026015628, -0.0083606485, -0... 36.131721 \n", + "9 [0.0018425643, -0.011489441, -0.0052835834, 0.... 36.139870 " ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - " product_id product_name total_orders\n", - "0 46149 Zero Calorie Cola 50" - ], "text/html": [ "\n", "
\n", @@ -2014,38 +1984,17 @@ "\n", "
\n", " \n" + ], + "text/plain": [ + " product_id product_name total_orders\n", + "0 46149 Zero Calorie Cola 50" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - " product_id product_name \\\n", - "0 26604 Organic Blackberries \n", - "1 27845 Organic Whole Milk \n", - "2 27966 Organic Raspberries \n", - "3 43352 Raspberries \n", - "4 9076 Blueberries \n", - "5 21288 Blackberries \n", - "6 39275 Organic Blueberries \n", - "7 39928 Organic Kiwi \n", - "8 11777 Red Raspberries \n", - "9 21137 Organic Strawberries \n", - "\n", - " vector _distance \n", - "0 [-0.017585486, 0.019628799, 0.0399348, 0.01422... 17.404045 \n", - "1 [-0.050286394, 0.026924692, 0.030701049, -0.02... 17.404305 \n", - "2 [-0.006732653, 0.015266006, 0.018316658, -0.00... 17.867121 \n", - "3 [0.0037516877, 0.013682851, 0.057814274, 0.031... 18.030893 \n", - "4 [0.0029817792, 0.030459687, 0.04528497, 0.0113... 18.135754 \n", - "5 [-0.011553102, -0.010046569, 0.037375, 0.02368... 18.141661 \n", - "6 [0.010543987, 0.006028164, 0.011502461, 0.0004... 18.241520 \n", - "7 [-0.044292357, -0.031322725, -0.00174381, -0.0... 18.414057 \n", - "8 [-0.0067819585, -0.023531102, 0.010277328, -0.... 18.468819 \n", - "9 [0.007023127, 0.0037457773, -0.0061378656, -0.... 18.476973 " - ], "text/html": [ "\n", "
\n", @@ -2355,18 +2304,38 @@ "
\n", " \n", " \n" + ], + "text/plain": [ + " product_id product_name \\\n", + "0 26604 Organic Blackberries \n", + "1 27845 Organic Whole Milk \n", + "2 27966 Organic Raspberries \n", + "3 43352 Raspberries \n", + "4 9076 Blueberries \n", + "5 21288 Blackberries \n", + "6 39275 Organic Blueberries \n", + "7 39928 Organic Kiwi \n", + "8 11777 Red Raspberries \n", + "9 21137 Organic Strawberries \n", + "\n", + " vector _distance \n", + "0 [-0.017585486, 0.019628799, 0.0399348, 0.01422... 17.404045 \n", + "1 [-0.050286394, 0.026924692, 0.030701049, -0.02... 17.404305 \n", + "2 [-0.006732653, 0.015266006, 0.018316658, -0.00... 17.867121 \n", + "3 [0.0037516877, 0.013682851, 0.057814274, 0.031... 18.030893 \n", + "4 [0.0029817792, 0.030459687, 0.04528497, 0.0113... 18.135754 \n", + "5 [-0.011553102, -0.010046569, 0.037375, 0.02368... 18.141661 \n", + "6 [0.010543987, 0.006028164, 0.011502461, 0.0004... 18.241520 \n", + "7 [-0.044292357, -0.031322725, -0.00174381, -0.0... 18.414057 \n", + "8 [-0.0067819585, -0.023531102, 0.010277328, -0.... 18.468819 \n", + "9 [0.007023127, 0.0037457773, -0.0061378656, -0.... 18.476973 " ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - " product_id product_name total_orders\n", - "0 27845 Organic Whole Milk 49\n", - "1 26604 Organic Blackberries 32" - ], "text/html": [ "\n", "
\n", @@ -2617,9 +2586,15 @@ "
\n", " \n", " \n" + ], + "text/plain": [ + " product_id product_name total_orders\n", + "0 27845 Organic Whole Milk 49\n", + "1 26604 Organic Blackberries 32" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -2662,32 +2637,25 @@ }, "widgets": { "application/vnd.jupyter.widget-state+json": { - "2c0101b0a3574a14b2a37fc431eb2908": { + "000f9e8fd1db4bc0a7aceeb822ca2b2e": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_31c3c90fa42f489796fba11d57799089", - "IPY_MODEL_e13993dda2da40ff806d6e31a6e987d3", - "IPY_MODEL_0bff70b647f3404fa15690ec9f3d0c78" - ], - "layout": "IPY_MODEL_674cf2d29d044cada59480813e0e8e58" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "31c3c90fa42f489796fba11d57799089": { + "0bff70b647f3404fa15690ec9f3d0c78": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2699,40 +2667,38 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_bfd4ff099ed14ab1bd79233beea7f402", + "layout": "IPY_MODEL_bf95fd811f79425bb2248525aeab7da0", "placeholder": "​", - "style": "IPY_MODEL_000f9e8fd1db4bc0a7aceeb822ca2b2e", - "value": "100%" + "style": "IPY_MODEL_46fb5083adf24ce4ae3fd4ea9aa4772e", + "value": " 50/50 [17:28<00:00, 20.73s/it]" } }, - "e13993dda2da40ff806d6e31a6e987d3": { + "2c0101b0a3574a14b2a37fc431eb2908": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_75b270d981de425ba1fd9a790b2a68ff", - "max": 50, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_baafe1d810594384af1a5ffa4f2f5cb4", - "value": 50 + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_31c3c90fa42f489796fba11d57799089", + "IPY_MODEL_e13993dda2da40ff806d6e31a6e987d3", + "IPY_MODEL_0bff70b647f3404fa15690ec9f3d0c78" + ], + "layout": "IPY_MODEL_674cf2d29d044cada59480813e0e8e58" } }, - "0bff70b647f3404fa15690ec9f3d0c78": { + "31c3c90fa42f489796fba11d57799089": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2744,16 +2710,31 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_bf95fd811f79425bb2248525aeab7da0", + "layout": "IPY_MODEL_bfd4ff099ed14ab1bd79233beea7f402", "placeholder": "​", - "style": "IPY_MODEL_46fb5083adf24ce4ae3fd4ea9aa4772e", - "value": " 50/50 [17:28<00:00, 20.73s/it]" + "style": "IPY_MODEL_000f9e8fd1db4bc0a7aceeb822ca2b2e", + "value": "100%" } }, - "674cf2d29d044cada59480813e0e8e58": { + "46fb5083adf24ce4ae3fd4ea9aa4772e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4b0298a9ecf84b509fbf379d43339b9c": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2802,10 +2783,10 @@ "width": null } }, - "bfd4ff099ed14ab1bd79233beea7f402": { + "4b20ad4b356645bbbfb94929160943f2": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2854,25 +2835,48 @@ "width": null } }, - "000f9e8fd1db4bc0a7aceeb822ca2b2e": { + "5b98b7b242994c999064688c9210c61b": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d5b1eb34ddc949aebd25b3744b93b726", + "IPY_MODEL_752d37b9a68b42d284493645962f3782", + "IPY_MODEL_f0def002c7ca41f6a70e9dba1bc605c7" + ], + "layout": "IPY_MODEL_4b0298a9ecf84b509fbf379d43339b9c" + } + }, + "63b8646c732246988f566d0442a070e8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "75b270d981de425ba1fd9a790b2a68ff": { + "674cf2d29d044cada59480813e0e8e58": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2921,26 +2925,34 @@ "width": null } }, - "baafe1d810594384af1a5ffa4f2f5cb4": { + "752d37b9a68b42d284493645962f3782": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4b20ad4b356645bbbfb94929160943f2", + "max": 192802, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_63b8646c732246988f566d0442a070e8", + "value": 192802 } }, - "bf95fd811f79425bb2248525aeab7da0": { + "75b270d981de425ba1fd9a790b2a68ff": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2989,113 +3001,10 @@ "width": null } }, - "46fb5083adf24ce4ae3fd4ea9aa4772e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5b98b7b242994c999064688c9210c61b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_d5b1eb34ddc949aebd25b3744b93b726", - "IPY_MODEL_752d37b9a68b42d284493645962f3782", - "IPY_MODEL_f0def002c7ca41f6a70e9dba1bc605c7" - ], - "layout": "IPY_MODEL_4b0298a9ecf84b509fbf379d43339b9c" - } - }, - "d5b1eb34ddc949aebd25b3744b93b726": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a37be209d5bb44e18f32c0259073d2c8", - "placeholder": "​", - "style": "IPY_MODEL_b35984b48d8847eea119ee5eda049b9d", - "value": "100%" - } - }, - "752d37b9a68b42d284493645962f3782": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4b20ad4b356645bbbfb94929160943f2", - "max": 192802, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_63b8646c732246988f566d0442a070e8", - "value": 192802 - } - }, - "f0def002c7ca41f6a70e9dba1bc605c7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ae8581ec76314304b2078759e1dbdd7e", - "placeholder": "​", - "style": "IPY_MODEL_d0e90066f1ec42afa5f1c02551d3889e", - "value": " 192802/192802 [02:11<00:00, 1657.77it/s]" - } - }, - "4b0298a9ecf84b509fbf379d43339b9c": { + "a37be209d5bb44e18f32c0259073d2c8": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3144,10 +3053,10 @@ "width": null } }, - "a37be209d5bb44e18f32c0259073d2c8": { + "ae8581ec76314304b2078759e1dbdd7e": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3198,8 +3107,8 @@ }, "b35984b48d8847eea119ee5eda049b9d": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3211,10 +3120,26 @@ "description_width": "" } }, - "4b20ad4b356645bbbfb94929160943f2": { + "baafe1d810594384af1a5ffa4f2f5cb4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "bf95fd811f79425bb2248525aeab7da0": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3263,26 +3188,10 @@ "width": null } }, - "63b8646c732246988f566d0442a070e8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "ae8581ec76314304b2078759e1dbdd7e": { + "bfd4ff099ed14ab1bd79233beea7f402": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3333,8 +3242,8 @@ }, "d0e90066f1ec42afa5f1c02551d3889e": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3345,6 +3254,72 @@ "_view_name": "StyleView", "description_width": "" } + }, + "d5b1eb34ddc949aebd25b3744b93b726": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a37be209d5bb44e18f32c0259073d2c8", + "placeholder": "​", + "style": "IPY_MODEL_b35984b48d8847eea119ee5eda049b9d", + "value": "100%" + } + }, + "e13993dda2da40ff806d6e31a6e987d3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_75b270d981de425ba1fd9a790b2a68ff", + "max": 50, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_baafe1d810594384af1a5ffa4f2f5cb4", + "value": 50 + } + }, + "f0def002c7ca41f6a70e9dba1bc605c7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ae8581ec76314304b2078759e1dbdd7e", + "placeholder": "​", + "style": "IPY_MODEL_d0e90066f1ec42afa5f1c02551d3889e", + "value": " 192802/192802 [02:11<00:00, 1657.77it/s]" + } } } } diff --git a/examples/product-recommender/main.ipynb b/examples/product-recommender/main.ipynb index 66c5c688..48ee5b92 100644 --- a/examples/product-recommender/main.ipynb +++ b/examples/product-recommender/main.ipynb @@ -1,2979 +1,3132 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "YmdWGrw4t5G2" - }, - "source": [ - "# Product Recommender using Collaborative Filtering and LanceDB\n", - "\n", - "We are going to use **LanceDB** and **Collaborative Filtering** to recommend products based on a user's past buying history. We used the **Instacart dataset** as our data for this example.\n", - "\n", - "![picture](https://daxg39y63pxwu.cloudfront.net/images/blog/product-recommendation-system-projects/Product_Recommendation_System_Project_Ideas_and_Examples.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lXd46ecEt5G7" - }, - "source": [ - "To run this example, you must first create a Kaggle account. Then, go to the 'Account' tab of your user profile and select 'Create New Token'. This will trigger the download of kaggle.json, a file containing your API credentials.\n", - "\n", - "Add Kaggle credentials to `~/.kaggle/kaggle.json` on Linux, OSX, and other UNIX-based operating systems or `C:\\Users\\\\.kaggle\\kaggle.json` for Window's users.\n", - "\n", - "In Google Colab, run the snippet below." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "l6TTPIF_omEy", - "outputId": "d2cf1685-103e-4b62-bae3-a16d171a928f", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ + "cells": [ { - "output_type": "stream", - "name": "stdout", - "text": [ - "Kaggle API key file created and moved successfully.\n" - ] - } - ], - "source": [ - "import json\n", - "import os\n", - "\n", - "# Set the file path\n", - "kaggle_json_path = \"/content/kaggle.json\"\n", - "\n", - "# Write Kaggle API key to the file\n", - "with open(kaggle_json_path, \"w\") as fp:\n", - " json.dump({\"username\": \"\", \"key\": \"\"}, fp)\n", - "\n", - "# Move the file to the correct location\n", - "os.system(\"mkdir -p ~/.kaggle\")\n", - "os.system(f\"mv {kaggle_json_path} ~/.kaggle/kaggle.json\")\n", - "\n", - "# Set permissions\n", - "os.system(\"chmod 600 ~/.kaggle/kaggle.json\")\n", - "\n", - "print(\"Kaggle API key file created and moved successfully.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "c6G45HrUqNx5" - }, - "source": [ - "### Install dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cell_type": "markdown", + "metadata": { + "id": "YmdWGrw4t5G2" + }, + "source": [ + "# Product Recommender using Collaborative Filtering and LanceDB\n", + "\n", + "We are going to use **LanceDB** and **Collaborative Filtering** to recommend products based on a user's past buying history. We used the **Instacart dataset** as our data for this example.\n", + "\n", + "![picture](https://daxg39y63pxwu.cloudfront.net/images/blog/product-recommendation-system-projects/Product_Recommendation_System_Project_Ideas_and_Examples.png)" + ] }, - "id": "R3_Hq2VC4_zT", - "outputId": "ee47bbd5-d1c3-4900-894e-2530190e17e7" - }, - "outputs": [ { - "output_type": "stream", - "name": "stdout", - "text": [ - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.23.5)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (1.11.4)\n", - "Requirement already satisfied: kaggle in /usr/local/lib/python3.10/dist-packages (1.5.16)\n", - "Collecting implicit\n", - " Downloading implicit-0.7.2-cp310-cp310-manylinux2014_x86_64.whl (8.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.9/8.9 MB\u001b[0m \u001b[31m18.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.1.0+cu121)\n", - "Collecting lancedb\n", - " Downloading lancedb-0.5.0-py3-none-any.whl (87 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.4/87.4 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.3.post1)\n", - "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.10/dist-packages (from kaggle) (1.16.0)\n", - "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from kaggle) (2023.11.17)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.31.0)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kaggle) (4.66.1)\n", - "Requirement already satisfied: python-slugify in /usr/local/lib/python3.10/dist-packages (from kaggle) (8.0.1)\n", - "Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.0.7)\n", - "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from kaggle) (6.1.0)\n", - "Requirement already satisfied: threadpoolctl in /usr/local/lib/python3.10/dist-packages (from implicit) (3.2.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.13.1)\n", - "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.5.0)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.2.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.3)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n", - "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.1.0)\n", - "Collecting deprecation (from lancedb)\n", - " Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)\n", - "Collecting pylance==0.9.6 (from lancedb)\n", - " Downloading pylance-0.9.6-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.6/18.6 MB\u001b[0m \u001b[31m58.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting ratelimiter~=1.0 (from lancedb)\n", - " Downloading ratelimiter-1.2.0.post0-py3-none-any.whl (6.6 kB)\n", - "Collecting retry>=0.9.2 (from lancedb)\n", - " Downloading retry-0.9.2-py2.py3-none-any.whl (8.0 kB)\n", - "Requirement already satisfied: pydantic>=1.10 in /usr/local/lib/python3.10/dist-packages (from lancedb) (1.10.13)\n", - "Requirement already satisfied: attrs>=21.3.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (23.2.0)\n", - "Collecting semver>=3.0 (from lancedb)\n", - " Downloading semver-3.0.2-py3-none-any.whl (17 kB)\n", - "Requirement already satisfied: cachetools in /usr/local/lib/python3.10/dist-packages (from lancedb) (5.3.2)\n", - "Requirement already satisfied: pyyaml>=6.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (6.0.1)\n", - "Requirement already satisfied: click>=8.1.7 in /usr/local/lib/python3.10/dist-packages (from lancedb) (8.1.7)\n", - "Collecting overrides>=0.7 (from lancedb)\n", - " Downloading overrides-7.6.0-py3-none-any.whl (17 kB)\n", - "Collecting pyarrow>=12 (from pylance==0.9.6->lancedb)\n", - " Downloading pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (38.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.3/38.3 MB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.6)\n", - "Requirement already satisfied: decorator>=3.4.2 in /usr/local/lib/python3.10/dist-packages (from retry>=0.9.2->lancedb) (4.4.2)\n", - "Collecting py<2.0.0,>=1.4.26 (from retry>=0.9.2->lancedb)\n", - " Downloading py-1.11.0-py2.py3-none-any.whl (98 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 kB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->kaggle) (0.5.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from deprecation->lancedb) (23.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.3)\n", - "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify->kaggle) (1.3)\n", - "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n", - "Installing collected packages: ratelimiter, semver, pyarrow, py, overrides, deprecation, retry, pylance, implicit, lancedb\n", - " Attempting uninstall: pyarrow\n", - " Found existing installation: pyarrow 10.0.1\n", - " Uninstalling pyarrow-10.0.1:\n", - " Successfully uninstalled pyarrow-10.0.1\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "ibis-framework 7.1.0 requires pyarrow<15,>=2, but you have pyarrow 15.0.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed deprecation-2.1.0 implicit-0.7.2 lancedb-0.5.0 overrides-7.6.0 py-1.11.0 pyarrow-15.0.0 pylance-0.9.6 ratelimiter-1.2.0.post0 retry-0.9.2 semver-3.0.2\n" - ] - } - ], - "source": [ - "!pip install numpy pandas scipy kaggle implicit torch lancedb" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "i_eatRhaIGIz" - }, - "source": [ - "### Importing libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "emp_MSXZt5G8" - }, - "outputs": [], - "source": [ - "import zipfile\n", - "import numpy as np\n", - "import pandas as pd\n", - "import scipy.sparse\n", - "import torch\n", - "import implicit\n", - "from implicit import evaluation\n", - "import pydantic\n", - "import lancedb\n", - "from lancedb.pydantic import pydantic_to_schema, vector" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bUGkN85V4_zY" - }, - "source": [ - "### Load the dataset\n", - "Now we can download the dataset. You will need to accept the rules of the `instacart-market-basket-analysis` competition, which you can do so [here](https://www.kaggle.com/competitions/instacart-market-basket-analysis/rules)." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cell_type": "markdown", + "metadata": { + "id": "lXd46ecEt5G7" + }, + "source": [ + "To downloading dataset in this example, you must have a Kaggle account.\n", + "\n", + "To get the Kaggle API credentials,\n", + "\n", + "Go to the Your Profile -> Settings -> Create Token\n", + "\n", + "This will download `kaggle.json`, a file containing your API credentials.\n", + "\n", + "Upload Kaggle credentials `kaggle.json` in Google Colab, run the snippet below." + ] }, - "id": "09gdQyBu4_zY", - "outputId": "bb92fb9e-df75-47a5-b50d-290ed0555ef4" - }, - "outputs": [ { - "output_type": "stream", - "name": "stdout", - "text": [ - "Downloading instacart-market-basket-analysis.zip to /content\n", - " 92% 181M/196M [00:01<00:00, 81.3MB/s]\n", - "100% 196M/196M [00:01<00:00, 105MB/s] \n" - ] - } - ], - "source": [ - "!kaggle competitions download -c instacart-market-basket-analysis" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "K4Q4cOX-4_zY" - }, - "source": [ - "We must now extract the zip files." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "f3g296nL4_zZ" - }, - "outputs": [], - "source": [ - "files = [\n", - " \"instacart-market-basket-analysis.zip\",\n", - " \"order_products__train.csv.zip\",\n", - " \"order_products__prior.csv.zip\",\n", - " \"products.csv.zip\",\n", - " \"orders.csv.zip\",\n", - "]\n", - "\n", - "for filename in files:\n", - " with zipfile.ZipFile(filename, \"r\") as zip_ref:\n", - " zip_ref.extractall(\"./\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oLgkRIfq4_zZ" - }, - "source": [ - "Now we can move on to loading the dataset. We'll first read the csv files and create dataframes." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "id": "cBbbR7Rut5G_" - }, - "outputs": [], - "source": [ - "products = pd.read_csv(\"products.csv\")\n", - "orders = pd.read_csv(\"orders.csv\")\n", - "order_products = pd.concat(\n", - " [pd.read_csv(\"order_products__train.csv\"), pd.read_csv(\"order_products__prior.csv\")]\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5FV_GGjst5HA" - }, - "source": [ - "Since there isn't a user rating attribute, we'll gather \"confidence\" data by looking at the frequency of each item purchased by a user, and store this in the `data` dataframe." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YNgjd2nnqNx7" - }, - "source": [ - "### Data Manipulation" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "ZjRh7RYpt5HB" - }, - "outputs": [], - "source": [ - "customer_order_products = pd.merge(orders, order_products, how=\"inner\", on=\"order_id\")\n", - "\n", - "# create confidence table\n", - "data = (\n", - " customer_order_products.groupby([\"user_id\", \"product_id\"])[[\"order_id\"]]\n", - " .count()\n", - " .reset_index()\n", - ")\n", - "data.columns = [\"user_id\", \"product_id\", \"total_orders\"]\n", - "data.product_id = data.product_id.astype(\"int64\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "77lvwm0St5HC" - }, - "source": [ - "Let's create a couple of test users to examine the recommendations later:\n", - "- 1st test user: buys 50 sodas: **Zero Calorie Cola**\n", - "- 2nd test user: buys organic produce: **Organic Whole Milk** and **Organic Blackberries**" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 + "cell_type": "code", + "source": [ + "! pip install kaggle\n", + "! mkdir ~/.kaggle\n", + "! cp kaggle.json ~/.kaggle/\n", + "! chmod 600 ~/.kaggle/kaggle.json" + ], + "metadata": { + "id": "N3WSkW3kmjyF", + "outputId": "26294f7b-350e-41f9-afe0-e34c9dac3b9e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: kaggle in /usr/local/lib/python3.10/dist-packages (1.5.16)\n", + "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.10/dist-packages (from kaggle) (1.16.0)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from kaggle) (2024.2.2)\n", + "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.8.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.31.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kaggle) (4.66.2)\n", + "Requirement already satisfied: python-slugify in /usr/local/lib/python3.10/dist-packages (from kaggle) (8.0.4)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.0.7)\n", + "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from kaggle) (6.1.0)\n", + "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->kaggle) (0.5.1)\n", + "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify->kaggle) (1.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.6)\n" + ] + } + ] }, - "id": "A06EfAf-t5HC", - "outputId": "95a1f51f-ced1-437a-8b62-569bb915262c" - }, - "outputs": [ { - "output_type": "execute_result", - "data": { - "text/plain": [ - " user_id product_id total_orders\n", - "13863744 206209 48697 1\n", - "13863745 206209 48742 2\n", - "13863746 206210 46149 50\n", - "13863747 206211 27845 49\n", - "13863748 206211 26604 32" + "cell_type": "markdown", + "metadata": { + "id": "c6G45HrUqNx5" + }, + "source": [ + "### Install dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "R3_Hq2VC4_zT", + "outputId": "752f8e45-ea8b-4b57-8a2b-0c7cb77f5f6c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.25.2)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (1.11.4)\n", + "Collecting implicit\n", + " Downloading implicit-0.7.2-cp310-cp310-manylinux2014_x86_64.whl (8.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.9/8.9 MB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.1.0+cu121)\n", + "Collecting lancedb\n", + " Downloading lancedb-0.6.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from implicit) (4.66.2)\n", + "Requirement already satisfied: threadpoolctl in /usr/local/lib/python3.10/dist-packages (from implicit) (3.3.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.13.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.10.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.3)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.1.0)\n", + "Collecting deprecation (from lancedb)\n", + " Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)\n", + "Collecting pylance==0.10.1 (from lancedb)\n", + " Downloading pylance-0.10.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.5/21.5 MB\u001b[0m \u001b[31m28.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting ratelimiter~=1.0 (from lancedb)\n", + " Downloading ratelimiter-1.2.0.post0-py3-none-any.whl (6.6 kB)\n", + "Collecting retry>=0.9.2 (from lancedb)\n", + " Downloading retry-0.9.2-py2.py3-none-any.whl (8.0 kB)\n", + "Requirement already satisfied: pydantic>=1.10 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.6.3)\n", + "Requirement already satisfied: attrs>=21.3.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (23.2.0)\n", + "Collecting semver>=3.0 (from lancedb)\n", + " Downloading semver-3.0.2-py3-none-any.whl (17 kB)\n", + "Requirement already satisfied: cachetools in /usr/local/lib/python3.10/dist-packages (from lancedb) (5.3.3)\n", + "Requirement already satisfied: pyyaml>=6.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (6.0.1)\n", + "Requirement already satisfied: click>=8.1.7 in /usr/local/lib/python3.10/dist-packages (from lancedb) (8.1.7)\n", + "Requirement already satisfied: requests>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.31.0)\n", + "Collecting overrides>=0.7 (from lancedb)\n", + " Downloading overrides-7.7.0-py3-none-any.whl (17 kB)\n", + "Requirement already satisfied: pyarrow>=12 in /usr/local/lib/python3.10/dist-packages (from pylance==0.10.1->lancedb) (14.0.2)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (0.6.0)\n", + "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (2.16.3)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2024.2.2)\n", + "Requirement already satisfied: decorator>=3.4.2 in /usr/local/lib/python3.10/dist-packages (from retry>=0.9.2->lancedb) (4.4.2)\n", + "Collecting py<2.0.0,>=1.4.26 (from retry>=0.9.2->lancedb)\n", + " Downloading py-1.11.0-py2.py3-none-any.whl (98 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 kB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from deprecation->lancedb) (23.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.5)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n", + "Installing collected packages: ratelimiter, semver, py, overrides, deprecation, retry, pylance, implicit, lancedb\n", + "Successfully installed deprecation-2.1.0 implicit-0.7.2 lancedb-0.6.1 overrides-7.7.0 py-1.11.0 pylance-0.10.1 ratelimiter-1.2.0.post0 retry-0.9.2 semver-3.0.2\n" + ] + } ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_idproduct_idtotal_orders
13863744206209486971
13863745206209487422
138637462062104614950
138637472062112784549
138637482062112660432
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" + "source": [ + "!pip install numpy pandas scipy implicit torch lancedb" ] - }, - "metadata": {}, - "execution_count": 15 - } - ], - "source": [ - "data_new = pd.DataFrame(\n", - " [\n", - " [data.user_id.max() + 1, 46149, 50],\n", - " [data.user_id.max() + 2, 27845, 49],\n", - " [data.user_id.max() + 2, 26604, 32],\n", - " ],\n", - " columns=[\"user_id\", \"product_id\", \"total_orders\"],\n", - ")\n", - "data = pd.concat([data, data_new]).reset_index(drop=True)\n", - "data.tail()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xBC-8PFTt5HD" - }, - "source": [ - "In the next step, we will extract user and product unique ids, in order to create a `CSR (Compressed Sparse Row)` matrix. This will allow us to perform collaborative filtering.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "id": "v2_2R7zmt5HE" - }, - "outputs": [], - "source": [ - "# extract unique user and product ids\n", - "unique_users = list(np.sort(data.user_id.unique()))\n", - "unique_products = list(np.sort(products.product_id.unique()))\n", - "purchases = list(data.total_orders)\n", - "\n", - "# create zero-based index position <-> user/item ID mappings\n", - "index_to_user = pd.Series(unique_users)\n", - "\n", - "# create reverse mappings from user/item ID to index positions\n", - "user_to_index = pd.Series(data=index_to_user.index + 1, index=index_to_user.values)\n", - "\n", - "# create row and column for user and product ids\n", - "users_rows = data.user_id.astype(int)\n", - "products_cols = data.product_id.astype(int)\n", - "\n", - "# create CSR matrix\n", - "matrix = scipy.sparse.csr_matrix(\n", - " (purchases, (users_rows, products_cols)),\n", - " shape=(len(unique_users) + 1, len(unique_products) + 1),\n", - ")\n", - "matrix.data = np.nan_to_num(matrix.data, copy=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "II6wOH96t5HF" - }, - "source": [ - "Let's now create a recommender model using the **implicit** library. The recommendation model is based off the algorithms described in the paper [Collaborative Filtering for Implicit Feedback Datasets](https://www.researchgate.net/publication/220765111_Collaborative_Filtering_for_Implicit_Feedback_Datasets) with performance optimizations described in [Applications of the Conjugate Gradient Method for Implicit Feedback Collaborative Filtering](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.379.6473&rep=rep1&type=pdf).\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JDwIxGMnqNx8" - }, - "source": [ - "# Difference between colloborative and content filtering\n", - "\n", - "![picture](https://miro.medium.com/v2/resize:fit:1400/0*R8qw_CXxCc4600bQ.png)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 104, - "referenced_widgets": [ - "51febb09c3d54a1a9cf5dd896f3a24f6", - "91b083fde4f14c39bbafb6fd099d44bd", - "84fca55b676b4ef2add284492c8f4c3c", - "bb2c985a09564562b6f040e31d817f07", - "cc06b425a9364b6eb07ef77c4ff6fc48", - "e2e92925bbb442f8a77e2d55886bfbfa", - "bc7f6859319f455da1f552b66a6cf026", - "66396eb857864cc8af94d7e2ced3102c", - "38ddb81c475a472d8439dcf72261b727", - "c095ad1b03a34c4e8b2077e373c82a5b", - "692c702c31904e058c809ae772f1579a" - ] }, - "id": "k0GW99kxt5HF", - "outputId": "548c2514-6194-43e4-dd24-6861f1808f5b" - }, - "outputs": [ { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/implicit/cpu/als.py:95: RuntimeWarning: OpenBLAS is configured to use 2 threads. It is highly recommended to disable its internal threadpool by setting the environment variable 'OPENBLAS_NUM_THREADS=1' or by calling 'threadpoolctl.threadpool_limits(1, \"blas\")'. Having OpenBLAS use a threadpool can lead to severe performance issues here.\n", - " check_blas_config()\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "i_eatRhaIGIz" + }, + "source": [ + "### Importing libraries" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - " 0%| | 0/50 [00:00\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
product_idproduct_namevector_distance
046149Zero Calorie Cola[-0.014371638, -0.016776536, -0.026950998, -0....36.209068
1196Soda[-0.031917833, -0.050772455, 0.013827451, -0.0...36.464764
240939Drinking Water[-0.013426425, 0.0053616967, -0.01992105, -0.0...36.504112
322802Mineral Water[-0.0062663523, -0.00076926383, -0.013624842, ...36.615498
437710Trail Mix[-0.01988333, -0.014069387, -0.021995109, -0.0...36.650448
542500Orange & Lemon Flavor Variety Pack Sparkling F...[-0.009584657, -0.023491196, -0.033104196, -0....36.696648
611759Organic Simply Naked Pita Chips[-0.009341286, -0.014609524, -0.0064758006, -0...36.705814
741400Crunchy Oats 'n Honey Granola Bars[-0.013461881, -0.021371827, -0.02064814, -0.0...36.709579
846061Popcorn[0.0019679032, 0.00719048, -0.01262015, -0.005...36.714954
926348Mixed Fruit Fruit Snacks[-0.0017672281, 0.0020188452, 0.012172974, -0....36.716858
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - " \n" + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "cBbbR7Rut5G_" + }, + "outputs": [], + "source": [ + "products = pd.read_csv(\"products.csv\")\n", + "orders = pd.read_csv(\"orders.csv\")\n", + "order_products = pd.concat(\n", + " [pd.read_csv(\"order_products__train.csv\"), pd.read_csv(\"order_products__prior.csv\")]\n", + ")" ] - }, - "metadata": {} }, { - "output_type": "display_data", - "data": { - "text/plain": [ - " product_id product_name total_orders\n", - "0 46149 Zero Calorie Cola 50" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
product_idproduct_nametotal_orders
046149Zero Calorie Cola50
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" + "cell_type": "markdown", + "metadata": { + "id": "5FV_GGjst5HA" + }, + "source": [ + "Since there isn't a user rating attribute, we'll gather \"confidence\" data by looking at the frequency of each item purchased by a user, and store this in the `data` dataframe." ] - }, - "metadata": {} }, { - "output_type": "display_data", - "data": { - "text/plain": [ - " product_id product_name \\\n", - "0 26604 Organic Blackberries \n", - "1 43352 Raspberries \n", - "2 27845 Organic Whole Milk \n", - "3 21288 Blackberries \n", - "4 27966 Organic Raspberries \n", - "5 9076 Blueberries \n", - "6 11777 Red Raspberries \n", - "7 39275 Organic Blueberries \n", - "8 21137 Organic Strawberries \n", - "9 13176 Bag of Organic Bananas \n", - "\n", - " vector _distance \n", - "0 [0.045252558, 0.04258531, 0.011869884, -0.0111... 17.445852 \n", - "1 [0.059606433, 0.014409931, 0.008712215, -0.007... 17.617174 \n", - "2 [-0.03977351, 0.012210161, 0.024828656, 0.0155... 17.692816 \n", - "3 [0.030181486, 0.049021076, 0.003293778, -0.038... 17.696075 \n", - "4 [0.020116415, 0.045062356, 0.00675044, 0.01640... 17.872534 \n", - "5 [0.0482006, 0.06329333, -0.015093377, 0.000180... 17.879623 \n", - "6 [0.05492493, 0.008120705, 0.020613482, 0.00779... 17.931437 \n", - "7 [0.005109854, 0.032895964, -0.013481544, 0.010... 17.970798 \n", - "8 [0.0017651353, 0.033547334, -0.005775958, 0.02... 17.986570 \n", - "9 [0.004607136, 0.02749164, -0.006206838, 0.0187... 18.092993 " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
product_idproduct_namevector_distance
026604Organic Blackberries[0.045252558, 0.04258531, 0.011869884, -0.0111...17.445852
143352Raspberries[0.059606433, 0.014409931, 0.008712215, -0.007...17.617174
227845Organic Whole Milk[-0.03977351, 0.012210161, 0.024828656, 0.0155...17.692816
321288Blackberries[0.030181486, 0.049021076, 0.003293778, -0.038...17.696075
427966Organic Raspberries[0.020116415, 0.045062356, 0.00675044, 0.01640...17.872534
59076Blueberries[0.0482006, 0.06329333, -0.015093377, 0.000180...17.879623
611777Red Raspberries[0.05492493, 0.008120705, 0.020613482, 0.00779...17.931437
739275Organic Blueberries[0.005109854, 0.032895964, -0.013481544, 0.010...17.970798
821137Organic Strawberries[0.0017651353, 0.033547334, -0.005775958, 0.02...17.986570
913176Bag of Organic Bananas[0.004607136, 0.02749164, -0.006206838, 0.0187...18.092993
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" + "cell_type": "markdown", + "metadata": { + "id": "YNgjd2nnqNx7" + }, + "source": [ + "### Data Manipulation" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "ZjRh7RYpt5HB" + }, + "outputs": [], + "source": [ + "customer_order_products = pd.merge(orders, order_products, how=\"inner\", on=\"order_id\")\n", + "\n", + "# create confidence table\n", + "data = (\n", + " customer_order_products.groupby([\"user_id\", \"product_id\"])[[\"order_id\"]]\n", + " .count()\n", + " .reset_index()\n", + ")\n", + "data.columns = [\"user_id\", \"product_id\", \"total_orders\"]\n", + "data.product_id = data.product_id.astype(\"int64\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77lvwm0St5HC" + }, + "source": [ + "Let's create a couple of test users to examine the recommendations later:\n", + "- 1st test user: buys 50 sodas: **Zero Calorie Cola**\n", + "- 2nd test user: buys organic produce: **Organic Whole Milk** and **Organic Blackberries**" ] - }, - "metadata": {} }, { - "output_type": "display_data", - "data": { - "text/plain": [ - " product_id product_name total_orders\n", - "0 27845 Organic Whole Milk 49\n", - "1 26604 Organic Blackberries 32" + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "A06EfAf-t5HC", + "outputId": "48ef0f5d-7c7a-4087-fd4b-8d3fa5ebaca1" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " user_id product_id total_orders\n", + "13863744 206209 48697 1\n", + "13863745 206209 48742 2\n", + "13863746 206210 46149 50\n", + "13863747 206211 27845 49\n", + "13863748 206211 26604 32" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_idproduct_idtotal_orders
13863744206209486971
13863745206209487422
138637462062104614950
138637472062112784549
138637482062112660432
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"data\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"user_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 206209,\n \"max\": 206211,\n \"num_unique_values\": 3,\n \"samples\": [\n 206209,\n 206210,\n 206211\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"product_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11361,\n \"min\": 26604,\n \"max\": 48742,\n \"num_unique_values\": 5,\n \"samples\": [\n 48742,\n 26604,\n 46149\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_orders\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24,\n \"min\": 1,\n \"max\": 50,\n \"num_unique_values\": 5,\n \"samples\": [\n 2,\n 32,\n 50\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 8 + } ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
product_idproduct_nametotal_orders
027845Organic Whole Milk49
126604Organic Blackberries32
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" + "source": [ + "data_new = pd.DataFrame(\n", + " [\n", + " [data.user_id.max() + 1, 46149, 50],\n", + " [data.user_id.max() + 2, 27845, 49],\n", + " [data.user_id.max() + 2, 26604, 32],\n", + " ],\n", + " columns=[\"user_id\", \"product_id\", \"total_orders\"],\n", + ")\n", + "data = pd.concat([data, data_new]).reset_index(drop=True)\n", + "data.tail()" ] - }, - "metadata": {} - } - ], - "source": [ - "# Query by user factors\n", - "test_user_embeddings = test_user_factors.tolist()\n", - "for embedding, id in zip(test_user_embeddings, test_user_ids):\n", - " results = tbl.search(embedding).limit(10).to_pandas()\n", - " display(results)\n", - " display(products_bought_by_user_in_the_past(id, top=15))" - ] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "-kWR644v1ZJp" - }, - "execution_count": null, - "outputs": [] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.1" - }, - "vscode": { - "interpreter": { - "hash": "5fe10bf018ef3e697f9035d60bf60847932a12bface18908407fd371fe880db9" - } - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "51febb09c3d54a1a9cf5dd896f3a24f6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_91b083fde4f14c39bbafb6fd099d44bd", - "IPY_MODEL_84fca55b676b4ef2add284492c8f4c3c", - "IPY_MODEL_bb2c985a09564562b6f040e31d817f07" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xBC-8PFTt5HD" + }, + "source": [ + "In the next step, we will extract user and product unique ids, in order to create a `CSR (Compressed Sparse Row)` matrix. This will allow us to perform collaborative filtering.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "v2_2R7zmt5HE" + }, + "outputs": [], + "source": [ + "# extract unique user and product ids\n", + "unique_users = list(np.sort(data.user_id.unique()))\n", + "unique_products = list(np.sort(products.product_id.unique()))\n", + "purchases = list(data.total_orders)\n", + "\n", + "# create zero-based index position <-> user/item ID mappings\n", + "index_to_user = pd.Series(unique_users)\n", + "\n", + "# create reverse mappings from user/item ID to index positions\n", + "user_to_index = pd.Series(data=index_to_user.index + 1, index=index_to_user.values)\n", + "\n", + "# create row and column for user and product ids\n", + "users_rows = data.user_id.astype(int)\n", + "products_cols = data.product_id.astype(int)\n", + "\n", + "# create CSR matrix\n", + "matrix = scipy.sparse.csr_matrix(\n", + " (purchases, (users_rows, products_cols)),\n", + " shape=(len(unique_users) + 1, len(unique_products) + 1),\n", + ")\n", + "matrix.data = np.nan_to_num(matrix.data, copy=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "II6wOH96t5HF" + }, + "source": [ + "Let's now create a recommender model using the **implicit** library. The recommendation model is based off the algorithms described in the paper [Collaborative Filtering for Implicit Feedback Datasets](https://www.researchgate.net/publication/220765111_Collaborative_Filtering_for_Implicit_Feedback_Datasets) with performance optimizations described in [Applications of the Conjugate Gradient Method for Implicit Feedback Collaborative Filtering](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.379.6473&rep=rep1&type=pdf).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JDwIxGMnqNx8" + }, + "source": [ + "# Difference between colloborative and content filtering\n", + "\n", + "![picture](https://miro.medium.com/v2/resize:fit:1400/0*R8qw_CXxCc4600bQ.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 104, + "referenced_widgets": [ + "c159a1c6fc204d239b5ff7713d3c68fe", + "6e3b621f67554d6cbcaa50717008821f", + "1e5f629b939247c088b275a72310cfe0", + "cfde2bc68d9c448b823c690e15c4a169", + "8668f98cebeb4b548e87f2c4e68c9cbf", + "7ebca3dced8e4c029398db02169b868e", + "28400c62e971452b865e70af4e410afc", + "c45f8ded7dc84c18b479c3c427c29463", + "301f4f324d594ff2a63dc2f43ba4391f", + "0e3594636fbf4263b32d195f31fd29c0", + "adf0848d8d8440f18dbd001572772fce" + ] + }, + "id": "k0GW99kxt5HF", + "outputId": "fd9c03c5-c668-4ddd-8fea-1b3e737b8ad6" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/implicit/cpu/als.py:95: RuntimeWarning: OpenBLAS is configured to use 2 threads. It is highly recommended to disable its internal threadpool by setting the environment variable 'OPENBLAS_NUM_THREADS=1' or by calling 'threadpoolctl.threadpool_limits(1, \"blas\")'. Having OpenBLAS use a threadpool can lead to severe performance issues here.\n", + " check_blas_config()\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " 0%| | 0/50 [00:00\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
product_idproduct_namevector_distance
046149Zero Calorie Cola[0.037515923, -0.030325921, 0.004221245, -0.00...38.190578
1196Soda[0.04531822, -0.04450815, -0.0022076364, -0.02...38.340080
222802Mineral Water[0.030236538, -0.0041136313, 0.015683502, -0.0...38.593525
340939Drinking Water[0.03287196, -0.017454194, 0.009911481, -0.004...38.606468
431651Extra Fancy Unsalted Mixed Nuts[0.037796307, -0.009871203, -0.0020715303, -0....38.642967
537710Trail Mix[0.05062829, -0.017916694, 0.0027849572, 0.001...38.668938
641400Crunchy Oats 'n Honey Granola Bars[0.028622035, -0.013106515, -0.0072577046, -0....38.703171
726348Mixed Fruit Fruit Snacks[0.011525251, -0.032522, -0.021976499, 0.01198...38.709934
846061Popcorn[0.039293304, -0.016017294, -0.0010792917, 0.0...38.713402
939657Milk Chocolate Almonds[0.030015469, -0.00927157, 0.0061932686, 0.000...38.748997
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + " \n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "results", + "summary": "{\n \"name\": \"results\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"product_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14019,\n \"min\": 196,\n \"max\": 46149,\n \"num_unique_values\": 10,\n \"samples\": [\n 46061,\n 196,\n 37710\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"product_name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Popcorn\",\n \"Soda\",\n \"Trail Mix\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"_distance\",\n \"properties\": {\n \"dtype\": \"float32\",\n \"num_unique_values\": 10,\n \"samples\": [\n 38.713401794433594,\n 38.34008026123047,\n 38.66893768310547\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " product_id product_name total_orders\n", + "0 46149 Zero Calorie Cola 50" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
product_idproduct_nametotal_orders
046149Zero Calorie Cola50
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \" display(products_bought_by_user_in_the_past(id, top=15))\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"product_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 46149,\n \"max\": 46149,\n \"num_unique_values\": 1,\n \"samples\": [\n 46149\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"product_name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Zero Calorie Cola\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_orders\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 50,\n \"max\": 50,\n \"num_unique_values\": 1,\n \"samples\": [\n 50\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " product_id product_name \\\n", + "0 26604 Organic Blackberries \n", + "1 27845 Organic Whole Milk \n", + "2 27966 Organic Raspberries \n", + "3 43352 Raspberries \n", + "4 21288 Blackberries \n", + "5 39275 Organic Blueberries \n", + "6 11777 Red Raspberries \n", + "7 9076 Blueberries \n", + "8 21137 Organic Strawberries \n", + "9 11422 Plain Greek Yogurt \n", + "\n", + " vector _distance \n", + "0 [0.019478824, 0.007443799, 0.004226536, 0.0283... 16.314867 \n", + "1 [-0.03417227, -0.053161107, 0.03893201, 0.0150... 16.432335 \n", + "2 [0.024305355, -0.0063351737, 0.029324768, 0.02... 16.577738 \n", + "3 [0.020642506, 0.025494106, 0.0050161625, 0.003... 16.588812 \n", + "4 [-0.00844225, 0.01996236, -0.0148576135, 0.012... 16.672234 \n", + "5 [0.035410225, -0.0029810749, 0.014112177, 0.00... 16.684757 \n", + "6 [0.020807281, -0.015660688, 0.010914551, 0.028... 16.746056 \n", + "7 [0.033343736, 0.0068411743, 0.0028535812, 0.00... 16.765997 \n", + "8 [0.018478896, -0.0014569649, 0.01558258, 0.009... 16.883642 \n", + "9 [0.003926732, -0.02004065, 0.059874147, 0.0318... 17.008499 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
product_idproduct_namevector_distance
026604Organic Blackberries[0.019478824, 0.007443799, 0.004226536, 0.0283...16.314867
127845Organic Whole Milk[-0.03417227, -0.053161107, 0.03893201, 0.0150...16.432335
227966Organic Raspberries[0.024305355, -0.0063351737, 0.029324768, 0.02...16.577738
343352Raspberries[0.020642506, 0.025494106, 0.0050161625, 0.003...16.588812
421288Blackberries[-0.00844225, 0.01996236, -0.0148576135, 0.012...16.672234
539275Organic Blueberries[0.035410225, -0.0029810749, 0.014112177, 0.00...16.684757
611777Red Raspberries[0.020807281, -0.015660688, 0.010914551, 0.028...16.746056
79076Blueberries[0.033343736, 0.0068411743, 0.0028535812, 0.00...16.765997
821137Organic Strawberries[0.018478896, -0.0014569649, 0.01558258, 0.009...16.883642
911422Plain Greek Yogurt[0.003926732, -0.02004065, 0.059874147, 0.0318...17.008499
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "results", + "summary": "{\n \"name\": \"results\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"product_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11503,\n \"min\": 9076,\n \"max\": 43352,\n \"num_unique_values\": 10,\n \"samples\": [\n 21137,\n 27845,\n 39275\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"product_name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Organic Strawberries\",\n \"Organic Whole Milk\",\n \"Organic Blueberries\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"_distance\",\n \"properties\": {\n \"dtype\": \"float32\",\n \"num_unique_values\": 10,\n \"samples\": [\n 16.883642196655273,\n 16.432334899902344,\n 16.684757232666016\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " product_id product_name total_orders\n", + "0 27845 Organic Whole Milk 49\n", + "1 26604 Organic Blackberries 32" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
product_idproduct_nametotal_orders
027845Organic Whole Milk49
126604Organic Blackberries32
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \" display(products_bought_by_user_in_the_past(id, top=15))\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"product_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 877,\n \"min\": 26604,\n \"max\": 27845,\n \"num_unique_values\": 2,\n \"samples\": [\n 26604,\n 27845\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"product_name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Organic Blackberries\",\n \"Organic Whole Milk\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_orders\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12,\n \"min\": 32,\n \"max\": 49,\n \"num_unique_values\": 2,\n \"samples\": [\n 32,\n 49\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {} + } + ], + "source": [ + "# Query by user factors\n", + "test_user_embeddings = test_user_factors.tolist()\n", + "for embedding, id in zip(test_user_embeddings, test_user_ids):\n", + " results = tbl.search(embedding).limit(10).to_pandas()\n", + " display(results)\n", + " display(products_bought_by_user_in_the_past(id, top=15))" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] }, - "2782769e3daa491385bcc8ae34f24f3b": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, - "5d41569b941445bea2497c89d3c8e6cb": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.1" }, - "5e7dd2740d174064ac2d1cbc75cb5909": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + "vscode": { + "interpreter": { + "hash": "5fe10bf018ef3e697f9035d60bf60847932a12bface18908407fd371fe880db9" + } }, - "a67972dc3f264b3699816257f1ad9ed7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "c159a1c6fc204d239b5ff7713d3c68fe": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6e3b621f67554d6cbcaa50717008821f", + "IPY_MODEL_1e5f629b939247c088b275a72310cfe0", + "IPY_MODEL_cfde2bc68d9c448b823c690e15c4a169" + ], + "layout": "IPY_MODEL_8668f98cebeb4b548e87f2c4e68c9cbf" + } + }, + "6e3b621f67554d6cbcaa50717008821f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7ebca3dced8e4c029398db02169b868e", + "placeholder": "​", + "style": "IPY_MODEL_28400c62e971452b865e70af4e410afc", + "value": "100%" + } + }, + "1e5f629b939247c088b275a72310cfe0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c45f8ded7dc84c18b479c3c427c29463", + "max": 50, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_301f4f324d594ff2a63dc2f43ba4391f", + "value": 50 + } + }, + "cfde2bc68d9c448b823c690e15c4a169": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0e3594636fbf4263b32d195f31fd29c0", + "placeholder": "​", + "style": "IPY_MODEL_adf0848d8d8440f18dbd001572772fce", + "value": " 50/50 [17:12<00:00, 20.75s/it]" + } + }, + "8668f98cebeb4b548e87f2c4e68c9cbf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7ebca3dced8e4c029398db02169b868e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "28400c62e971452b865e70af4e410afc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c45f8ded7dc84c18b479c3c427c29463": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "301f4f324d594ff2a63dc2f43ba4391f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0e3594636fbf4263b32d195f31fd29c0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "adf0848d8d8440f18dbd001572772fce": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "142121b5c098477985d3bf5eb9560ad4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_49f9dab3bf2748a2a0811a7057c32ff7", + "IPY_MODEL_3ea9a47313cd496694180de85b51decf", + "IPY_MODEL_1cd7d3c410ed449eb88cc8d78e49e10d" + ], + "layout": "IPY_MODEL_e66f741c3e794c69a328c715cc9b56a2" + } + }, + "49f9dab3bf2748a2a0811a7057c32ff7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4a785b8e4b0d43eca0cf41c2b1cb2f35", + "placeholder": "​", + "style": "IPY_MODEL_05369b050a61407f8cd0c657afb9a6bd", + "value": "100%" + } + }, + "3ea9a47313cd496694180de85b51decf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9ffbed3caaf84e1db7bde609b6cc06a7", + "max": 192999, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6142e63dd35c46839b9b8cd520750844", + "value": 192999 + } + }, + "1cd7d3c410ed449eb88cc8d78e49e10d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cb770a9f4224470bba0a7488b76a24c0", + "placeholder": "​", + "style": "IPY_MODEL_2dea74cc01b04e548bb7a77bd31a2fd2", + "value": " 192999/192999 [02:18<00:00, 1522.55it/s]" + } + }, + "e66f741c3e794c69a328c715cc9b56a2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4a785b8e4b0d43eca0cf41c2b1cb2f35": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "05369b050a61407f8cd0c657afb9a6bd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9ffbed3caaf84e1db7bde609b6cc06a7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6142e63dd35c46839b9b8cd520750844": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "cb770a9f4224470bba0a7488b76a24c0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2dea74cc01b04e548bb7a77bd31a2fd2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } } - } - } - }, - "nbformat": 4, - "nbformat_minor": 0 + }, + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file