diff --git a/README.md b/README.md
index bb5609ca..4a74c7f6 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ If you're looking for in-depth tutorial-like examples, checkout the [tutorials](
| [RAG Fusion](/examples/RAG_Fusion/) | [![LLM](https://img.shields.io/badge/openai-api-white)](#)|
| [Hybrid search BM25 & lancedb ](./examples/Hybrid_search_bm25_lancedb/) | [![LLM](https://img.shields.io/badge/openai-api-white)](#)|[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/hybrid-search-combining-bm25-and-semantic-search-for-better-results-with-lan-1358038fe7e6)|
| [Evaluating Prompts with Prompttools](/examples/prompttools-eval-prompts/) | [![LLM](https://img.shields.io/badge/openai-api-white)](#) [![local LLM](https://img.shields.io/badge/local-llm-green)](#)| |
-| [NER powered with Semantic Search](/tutorials/NER-powered-Semantic-Search/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/NER-powered-Semantic-Search/NER_powered_Semantic_Search_with_LanceDB.ipynb)[![local LLM](https://img.shields.io/badge/local-llm-green)](#) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/ner-powered-semantic-search-using-lancedb-51051dc3e493)|
+| [NER powered with Semantic Search](/tutorials/NER-powered-Semantic-Search/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/NER-powered-Semantic-Search/NER_powered_Semantic_Search_with_LanceDB.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/ner-powered-semantic-search-using-lancedb-51051dc3e493)|
[Sentiment Analysis : Analysing Hotel Reviews](/examples/Sentiment-Analysis-Analyse-Hotel-Reviews/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/Sentiment-Analysis-Analyse-Hotel-Reviews/Sentiment_Analysis_using_LanceDB.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/sentiment-analysis-using-lancedb-2da3cb1e3fa6)|
| [Facial Recognition](./examples/facial_recognition) | |
| [Accelerate Vector Search Applications Using OpenVINO](/tutorials/Accelerate-Vector-Search-Applications-Using-OpenVINO/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/Accelerate-Vector-Search-Applications-Using-OpenVINO/clip_text_image_search.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#)| [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/accelerate-vector-search-applications-using-openvino-51366eabf866)|
@@ -87,15 +87,15 @@ Looking to get started with LLMs, vectorDBs, and the world of Generative AI? The
| Tutorial | Interactive Environment | Blog Link |
| --------- | -------------------------- | ----------- |
| | | |
-| [Corrective RAG with Langgraph](./tutorials/Corrective-RAG-with_Langgraph/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/Corrective-RAG-with_Langgraph/CRAG_with_Langgraph.ipynb) | |
+| [Corrective RAG with Langgraph](./tutorials/Corrective-RAG-with_Langgraph/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/Corrective-RAG-with_Langgraph/CRAG_with_Langgraph.ipynb) [![LLM](https://img.shields.io/badge/openai-api-white)](#) | |
| [Product Quantization: Compress High Dimensional Vectors](https://blog.lancedb.com/product-quantization-compress-high-dimensional-vectors-dfcba98fab47) | | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/product-quantization-compress-high-dimensional-vectors-dfcba98fab47) |
| [LLMs, RAG, & the missing storage layer for AI](https://medium.com/etoai/llms-rag-the-missing-storage-layer-for-ai-28ded35fa984) | | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://medium.com/etoai/llms-rag-the-missing-storage-layer-for-ai-28ded35fa984) |
-| [Fine-Tuning LLM using PEFT & QLoRA](./tutorials/fine-tuning_LLM_with_PEFT_QLoRA) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/fine-tuning_LLM_with_PEFT_QLoRA/main.ipynb) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/optimizing-llms-a-step-by-step-guide-to-fine-tuning-with-peft-and-qlora-22eddd13d25b) |
-| [Context-Aware Chatbot using Llama 2 & LanceDB](./tutorials/chatbot_using_Llama2_&_lanceDB) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/chatbot_using_Llama2_&_lanceDB/main.ipynb) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/context-aware-chatbot-using-llama-2-lancedb-as-vector-database-4d771d95c755) |
+| [Fine-Tuning LLM using PEFT & QLoRA](./tutorials/fine-tuning_LLM_with_PEFT_QLoRA) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/fine-tuning_LLM_with_PEFT_QLoRA/main.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#)| [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/optimizing-llms-a-step-by-step-guide-to-fine-tuning-with-peft-and-qlora-22eddd13d25b) |
+| [Context-Aware Chatbot using Llama 2 & LanceDB](./tutorials/chatbot_using_Llama2_&_lanceDB) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/chatbot_using_Llama2_&_lanceDB/main.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#)| [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/context-aware-chatbot-using-llama-2-lancedb-as-vector-database-4d771d95c755) |
| [A Primer on Text Chunking and its Types](./tutorials/different-types-text-chunking-in-RAG) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/different-types-text-chunking-in-RAG/Text_Chunking_on_RAG_application_with_LanceDB.ipynb) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/a-primer-on-text-chunking-and-its-types-a420efc96a13) |
-| [NER powered Semantic Search](./tutorials/NER-powered-Semantic-Search) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/NER-powered-Semantic-Search/NER_powered_Semantic_Search_with_LanceDB.ipynb) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://medium.com/etoai/ner-powered-semantic-search-using-lancedb-51051dc3e493) |
-| [Better RAG with FLARE](./tutorials/better-rag-FLAIR) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/better-rag-FLAIR/main.ipynb) |[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://medium.com/@aksdesai1998/better-rag-enhancing-ai-with-active-retrieval-augmented-generation-flare-3b66646e2a9f) |
-| [Accelerate Vector Search Applications Using OpenVINO](./tutorials/Sentiment-Analysis-using-LanceDB) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/Accelerate-Vector-Search-Applications-Using-OpenVINO/clip_text_image_search.ipynb) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/accelerate-vector-search-applications-using-openvino-51366eabf866)|
+| [NER powered Semantic Search](./tutorials/NER-powered-Semantic-Search) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/NER-powered-Semantic-Search/NER_powered_Semantic_Search_with_LanceDB.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#)| [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://medium.com/etoai/ner-powered-semantic-search-using-lancedb-51051dc3e493) |
+| [Better RAG with FLARE](./tutorials/better-rag-FLAIR) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/better-rag-FLAIR/main.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#) [![LLM](https://img.shields.io/badge/openai-api-white)](#)|[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://medium.com/@aksdesai1998/better-rag-enhancing-ai-with-active-retrieval-augmented-generation-flare-3b66646e2a9f) |
+| [Accelerate Vector Search Applications Using OpenVINO](./tutorials/Sentiment-Analysis-using-LanceDB) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/Accelerate-Vector-Search-Applications-Using-OpenVINO/clip_text_image_search.ipynb) [![local LLM](https://img.shields.io/badge/local-llm-green)](#)| [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/accelerate-vector-search-applications-using-openvino-51366eabf866)|
diff --git a/examples/RAG_re_ranking/lancedb_cloud/README.md b/examples/RAG_re_ranking/lancedb_cloud/README.md
index 7ea69462..96a2d95b 100644
--- a/examples/RAG_re_ranking/lancedb_cloud/README.md
+++ b/examples/RAG_re_ranking/lancedb_cloud/README.md
@@ -12,6 +12,13 @@ if you would like to set api key through an environment variable:
```
export LANCEDB_API_KEY="sk_..."
```
+or
+```
+import os
+import getpass
+
+os.environ["LANCEDB_API_KEY"] = getpass.getpass("Enter Your LANCEDB API Key:")
+```
replace the following lines in main.py with your project slug and api key"
```
diff --git a/examples/RAG_re_ranking/lancedb_cloud/main.ipynb b/examples/RAG_re_ranking/lancedb_cloud/main.ipynb
index 21b6364b..8d1999e7 100644
--- a/examples/RAG_re_ranking/lancedb_cloud/main.ipynb
+++ b/examples/RAG_re_ranking/lancedb_cloud/main.ipynb
@@ -39,71 +39,46 @@
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "wgPbKbpumkhH"
+ },
"source": [
"## Credentials\n",
"\n",
"Copy and paste the project name and the api key from your project page.\n",
"These will be used later to [connect to LanceDB Cloud](#scroll-to=5q8m6GMD7sGu)"
- ],
- "metadata": {
- "id": "wgPbKbpumkhH"
- }
+ ]
},
{
"cell_type": "code",
- "source": [
- "project_slug = \"your-project-slug\" # @param {type:\"string\"}"
- ],
+ "execution_count": 1,
"metadata": {
"id": "rqEXT5-fmofw"
},
- "execution_count": 1,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "project_slug = \"your-project-slug\" # @param {type:\"string\"}"
+ ]
},
{
"cell_type": "code",
- "source": [
- "api_key = \"sk_...\" # @param {type:\"string\"}"
- ],
+ "execution_count": 2,
"metadata": {
"id": "5LYmBomPmswi"
},
- "execution_count": 2,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "api_key = \"sk_...\" # @param {type:\"string\"}"
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "You can also set the LANCEDB_API_KEY as an environment variable with one of the options below"
- ],
"metadata": {
"id": "Xs6tr6CMnBrr"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "!export LANCEDB_API_KEY=\"sk_...\""
- ],
- "metadata": {
- "id": "RtjJHRNHnEHE"
},
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
"source": [
- "import os\n",
- "import getpass\n",
- "\n",
- "os.environ[\"LANCEDB_API_KEY\"] = getpass.getpass(\"Enter Your LANCEDB API Key:\")"
- ],
- "metadata": {
- "id": "Qsk18VNxnGIg"
- },
- "execution_count": null,
- "outputs": []
+ "You can also set the LANCEDB_API_KEY as an environment variable. More details can be found **here**."
+ ]
},
{
"cell_type": "markdown",
@@ -126,8 +101,8 @@
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.4/87.4 kB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.4/8.4 MB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
@@ -335,8 +310,8 @@
},
"outputs": [
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
"The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
@@ -347,172 +322,172 @@
]
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "tokenizer_config.json: 0%| | 0.00/396 [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "25222b6616e1489eb531950c958c5fdf",
"version_major": 2,
- "version_minor": 0,
- "model_id": "25222b6616e1489eb531950c958c5fdf"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/396 [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "vocab.txt: 0%| | 0.00/232k [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "80d917f992794502aa6828ed7d01af98",
"version_major": 2,
- "version_minor": 0,
- "model_id": "80d917f992794502aa6828ed7d01af98"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "vocab.txt: 0%| | 0.00/232k [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "tokenizer.json: 0%| | 0.00/712k [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "db68c63dcd244ca9b8b391559f8abfd1",
"version_major": 2,
- "version_minor": 0,
- "model_id": "db68c63dcd244ca9b8b391559f8abfd1"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/712k [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "special_tokens_map.json: 0%| | 0.00/125 [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "2f98cd9e8e5f4466a7a2cf88b087ce55",
"version_major": 2,
- "version_minor": 0,
- "model_id": "2f98cd9e8e5f4466a7a2cf88b087ce55"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/125 [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "config.json: 0%| | 0.00/731 [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "4d174874270f4e0f88eb27a16aa0f11c",
"version_major": 2,
- "version_minor": 0,
- "model_id": "4d174874270f4e0f88eb27a16aa0f11c"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "config.json: 0%| | 0.00/731 [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "model.safetensors: 0%| | 0.00/438M [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "8a12ffea544146c29541b3b4a1c6db2b",
"version_major": 2,
- "version_minor": 0,
- "model_id": "8a12ffea544146c29541b3b4a1c6db2b"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/438M [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "tokenizer_config.json: 0%| | 0.00/443 [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "9003eadfcead41aaaadabf18a706200f",
"version_major": 2,
- "version_minor": 0,
- "model_id": "9003eadfcead41aaaadabf18a706200f"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/443 [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "sentencepiece.bpe.model: 0%| | 0.00/5.07M [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "2be215a977184227abd5983e7c81b3ff",
"version_major": 2,
- "version_minor": 0,
- "model_id": "2be215a977184227abd5983e7c81b3ff"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "sentencepiece.bpe.model: 0%| | 0.00/5.07M [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "tokenizer.json: 0%| | 0.00/17.1M [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "373fe974f094461d87f5b40ad6aa4e91",
"version_major": 2,
- "version_minor": 0,
- "model_id": "373fe974f094461d87f5b40ad6aa4e91"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/17.1M [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "special_tokens_map.json: 0%| | 0.00/279 [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "2bfe8c958a9d44f781453b529255e01f",
"version_major": 2,
- "version_minor": 0,
- "model_id": "2bfe8c958a9d44f781453b529255e01f"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/279 [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "config.json: 0%| | 0.00/799 [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "3ab4015a66824bb3a2374d5a090e4e35",
"version_major": 2,
- "version_minor": 0,
- "model_id": "3ab4015a66824bb3a2374d5a090e4e35"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "config.json: 0%| | 0.00/799 [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "model.safetensors: 0%| | 0.00/1.11G [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "3e3303826e33485ca844fc82a1035b61",
"version_major": 2,
- "version_minor": 0,
- "model_id": "3e3303826e33485ca844fc82a1035b61"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/1.11G [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
@@ -621,80 +596,63 @@
},
"outputs": [
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "Downloading data: 0%| | 0.00/93.3k [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "4f46a6443368428193090a6a4ddf3473",
"version_major": 2,
- "version_minor": 0,
- "model_id": "4f46a6443368428193090a6a4ddf3473"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading data: 0%| | 0.00/93.3k [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "Generating queries split: 0%| | 0/1000 [00:00, ? examples/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "8a78b64d3f6b4ffdae7e79266d798635",
"version_major": 2,
- "version_minor": 0,
- "model_id": "8a78b64d3f6b4ffdae7e79266d798635"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Generating queries split: 0%| | 0/1000 [00:00, ? examples/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "Downloading data: 0%| | 0.00/19.0M [00:00, ?B/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "a3eb172e9d324da1bd7d8914e66d2106",
"version_major": 2,
- "version_minor": 0,
- "model_id": "a3eb172e9d324da1bd7d8914e66d2106"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading data: 0%| | 0.00/19.0M [00:00, ?B/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- "Generating corpus split: 0%| | 0/25657 [00:00, ? examples/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "501f90bcf1ff4efe81cd377df249415e",
"version_major": 2,
- "version_minor": 0,
- "model_id": "501f90bcf1ff4efe81cd377df249415e"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Generating corpus split: 0%| | 0/25657 [00:00, ? examples/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "execute_result",
"data": {
- "text/plain": [
- " _id \\\n",
- "13181 6df3dc585e32f3b1cb49228d94a5469c30d79d2b \n",
- "18678 784376563c94e231241fbcf71d4d2774aec4b935 \n",
- "4444 19751e0f81a103658bbac2506f5d5c8e06a1c06a \n",
- "\n",
- " title \\\n",
- "13181 High Performance Computer Acoustic Data Accele... \n",
- "18678 A Comparison over Focused Web Crawling Strategies \n",
- "4444 STDP-based spiking deep convolutional neural n... \n",
- "\n",
- " text \n",
- "13181 This paper presents a new software model desig... \n",
- "18678 In this paper we review and compare focused cr... \n",
- "4444 Previous studies have shown that spike-timing-... "
- ],
"text/html": [
"\n",
"
\n",
@@ -951,10 +909,27 @@
"
\n",
" \n",
" \n"
+ ],
+ "text/plain": [
+ " _id \\\n",
+ "13181 6df3dc585e32f3b1cb49228d94a5469c30d79d2b \n",
+ "18678 784376563c94e231241fbcf71d4d2774aec4b935 \n",
+ "4444 19751e0f81a103658bbac2506f5d5c8e06a1c06a \n",
+ "\n",
+ " title \\\n",
+ "13181 High Performance Computer Acoustic Data Accele... \n",
+ "18678 A Comparison over Focused Web Crawling Strategies \n",
+ "4444 STDP-based spiking deep convolutional neural n... \n",
+ "\n",
+ " text \n",
+ "13181 This paper presents a new software model desig... \n",
+ "18678 In this paper we review and compare focused cr... \n",
+ "4444 Previous studies have shown that spike-timing-... "
]
},
+ "execution_count": 5,
"metadata": {},
- "execution_count": 5
+ "output_type": "execute_result"
}
],
"source": [
@@ -1003,18 +978,18 @@
},
"outputs": [
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- " 0%| | 0/79 [00:00, ?it/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "f6b5cd5ff9704a58986eff2c9c88db4c",
"version_major": 2,
- "version_minor": 0,
- "model_id": "f6b5cd5ff9704a58986eff2c9c88db4c"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/79 [00:00, ?it/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
@@ -1060,64 +1035,14 @@
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"QUERY:-> Classification of human activity by using a Stacked Autoencoder\n"
]
},
{
- "output_type": "execute_result",
"data": {
- "text/plain": [
- " _id \\\n",
- "0 83d323a5bb26b706d4f6d24eb27411a7e7ff57e6 \n",
- "1 a3345798b1faf238e8d805bbe9124b0b8e0c869f \n",
- "2 e0534bfb477c5a82e98d0cb386ae3eb31d349c91 \n",
- "3 c65945c08b7fd77ffd2c53369e8928699c3993e7 \n",
- "4 1af2e075903a3cc5ad5a192921a0b4fb67645dc1 \n",
- "5 3979cf5a013063e98ad0caf2e7110c2686cf1640 \n",
- "6 0fb926cae217b70c97c74eb70b2a6b8c47574812 \n",
- "7 488257dcbc7bcb56836f10a410e69c2c283989e5 \n",
- "8 65f415c6d88aca139867702fc64aa179781b8e65 \n",
- "9 244fc78ce607812edb90290727dab4d33377e986 \n",
- "\n",
- " title \\\n",
- "0 Protective action of green tea catechins in ne... \n",
- "1 Autophagy as a regulated pathway of cellular d... \n",
- "2 Cellular and molecular mechanisms of hepatocel... \n",
- "3 Comparing Alzheimer’s and Parkinson’s diseases... \n",
- "4 Mathematical models of cancer metabolism. \n",
- "5 Basic local alignment search tool. \n",
- "6 Network biology: understanding the cell's func... \n",
- "7 mTOR Signaling in Growth Control and Disease \n",
- "8 PID: the Pathway Interaction Database \n",
- "9 Transfer of mitochondria via tunneling nanotub... \n",
- "\n",
- " text \\\n",
- "0 Mitochondria are central players in the regula... \n",
- "1 Macroautophagy is a dynamic process involving ... \n",
- "2 Hepatocellular carcinoma (HCC) is the most com... \n",
- "3 Recent advances in large datasets analysis off... \n",
- "4 Metabolism is essential for life, and its alte... \n",
- "5 A new approach to rapid sequence comparison, b... \n",
- "6 A key aim of postgenomic biomedical research i... \n",
- "7 The mechanistic target of rapamycin (mTOR) sig... \n",
- "8 The Pathway Interaction Database (PID, http://... \n",
- "9 Tunneling nanotubes (TNTs) are F-actin-based m... \n",
- "\n",
- " vector _distance \n",
- "0 [-0.014866754, 0.0028244434, -0.023141732, 0.0... 0.281554 \n",
- "1 [-0.042504933, 0.00053501845, -0.016986104, 0.... 0.312909 \n",
- "2 [0.03984485, 0.01583628, -0.00934351, -0.02993... 0.366526 \n",
- "3 [-0.004613025, -0.0044279257, -0.013920496, 0.... 0.369777 \n",
- "4 [-0.0037386382, 0.011562068, -0.022479024, 0.0... 0.370503 \n",
- "5 [-0.006935188, 0.020925103, -0.051218845, 0.00... 0.372769 \n",
- "6 [0.012990677, 0.028128441, -0.006426807, -0.02... 0.376812 \n",
- "7 [0.0006567143, 0.0053487234, -0.0010087299, -0... 0.376821 \n",
- "8 [-0.007852315, 0.014019204, -0.026789214, -0.0... 0.378377 \n",
- "9 [-0.0063375738, 0.006348416, -0.034239322, 0.0... 0.380112 "
- ],
"text/html": [
"\n",
" \n",
@@ -1438,15 +1363,65 @@
"
\n",
" \n",
" \n"
- ]
- },
- "metadata": {},
- "execution_count": 8
- }
- ],
- "source": [
- "def search(query, top_k=10):\n",
- " \"\"\"\n",
+ ],
+ "text/plain": [
+ " _id \\\n",
+ "0 83d323a5bb26b706d4f6d24eb27411a7e7ff57e6 \n",
+ "1 a3345798b1faf238e8d805bbe9124b0b8e0c869f \n",
+ "2 e0534bfb477c5a82e98d0cb386ae3eb31d349c91 \n",
+ "3 c65945c08b7fd77ffd2c53369e8928699c3993e7 \n",
+ "4 1af2e075903a3cc5ad5a192921a0b4fb67645dc1 \n",
+ "5 3979cf5a013063e98ad0caf2e7110c2686cf1640 \n",
+ "6 0fb926cae217b70c97c74eb70b2a6b8c47574812 \n",
+ "7 488257dcbc7bcb56836f10a410e69c2c283989e5 \n",
+ "8 65f415c6d88aca139867702fc64aa179781b8e65 \n",
+ "9 244fc78ce607812edb90290727dab4d33377e986 \n",
+ "\n",
+ " title \\\n",
+ "0 Protective action of green tea catechins in ne... \n",
+ "1 Autophagy as a regulated pathway of cellular d... \n",
+ "2 Cellular and molecular mechanisms of hepatocel... \n",
+ "3 Comparing Alzheimer’s and Parkinson’s diseases... \n",
+ "4 Mathematical models of cancer metabolism. \n",
+ "5 Basic local alignment search tool. \n",
+ "6 Network biology: understanding the cell's func... \n",
+ "7 mTOR Signaling in Growth Control and Disease \n",
+ "8 PID: the Pathway Interaction Database \n",
+ "9 Transfer of mitochondria via tunneling nanotub... \n",
+ "\n",
+ " text \\\n",
+ "0 Mitochondria are central players in the regula... \n",
+ "1 Macroautophagy is a dynamic process involving ... \n",
+ "2 Hepatocellular carcinoma (HCC) is the most com... \n",
+ "3 Recent advances in large datasets analysis off... \n",
+ "4 Metabolism is essential for life, and its alte... \n",
+ "5 A new approach to rapid sequence comparison, b... \n",
+ "6 A key aim of postgenomic biomedical research i... \n",
+ "7 The mechanistic target of rapamycin (mTOR) sig... \n",
+ "8 The Pathway Interaction Database (PID, http://... \n",
+ "9 Tunneling nanotubes (TNTs) are F-actin-based m... \n",
+ "\n",
+ " vector _distance \n",
+ "0 [-0.014866754, 0.0028244434, -0.023141732, 0.0... 0.281554 \n",
+ "1 [-0.042504933, 0.00053501845, -0.016986104, 0.... 0.312909 \n",
+ "2 [0.03984485, 0.01583628, -0.00934351, -0.02993... 0.366526 \n",
+ "3 [-0.004613025, -0.0044279257, -0.013920496, 0.... 0.369777 \n",
+ "4 [-0.0037386382, 0.011562068, -0.022479024, 0.0... 0.370503 \n",
+ "5 [-0.006935188, 0.020925103, -0.051218845, 0.00... 0.372769 \n",
+ "6 [0.012990677, 0.028128441, -0.006426807, -0.02... 0.376812 \n",
+ "7 [0.0006567143, 0.0053487234, -0.0010087299, -0... 0.376821 \n",
+ "8 [-0.007852315, 0.014019204, -0.026789214, -0.0... 0.378377 \n",
+ "9 [-0.0063375738, 0.006348416, -0.034239322, 0.0... 0.380112 "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def search(query, top_k=10):\n",
+ " \"\"\"\n",
" Search a query from the table\n",
" \"\"\"\n",
" query_vector = embed_model.encode_queries(\n",
@@ -1494,76 +1469,14 @@
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"QUERY:-> Classification of human activity by using a Stacked Autoencoder\n"
]
},
{
- "output_type": "execute_result",
"data": {
- "text/plain": [
- " _id \\\n",
- "0 a3345798b1faf238e8d805bbe9124b0b8e0c869f \n",
- "1 3979cf5a013063e98ad0caf2e7110c2686cf1640 \n",
- "2 83d323a5bb26b706d4f6d24eb27411a7e7ff57e6 \n",
- "3 65f415c6d88aca139867702fc64aa179781b8e65 \n",
- "4 0fb926cae217b70c97c74eb70b2a6b8c47574812 \n",
- "5 1af2e075903a3cc5ad5a192921a0b4fb67645dc1 \n",
- "6 c65945c08b7fd77ffd2c53369e8928699c3993e7 \n",
- "7 488257dcbc7bcb56836f10a410e69c2c283989e5 \n",
- "8 244fc78ce607812edb90290727dab4d33377e986 \n",
- "9 e0534bfb477c5a82e98d0cb386ae3eb31d349c91 \n",
- "\n",
- " title \\\n",
- "0 Autophagy as a regulated pathway of cellular d... \n",
- "1 Basic local alignment search tool. \n",
- "2 Protective action of green tea catechins in ne... \n",
- "3 PID: the Pathway Interaction Database \n",
- "4 Network biology: understanding the cell's func... \n",
- "5 Mathematical models of cancer metabolism. \n",
- "6 Comparing Alzheimer’s and Parkinson’s diseases... \n",
- "7 mTOR Signaling in Growth Control and Disease \n",
- "8 Transfer of mitochondria via tunneling nanotub... \n",
- "9 Cellular and molecular mechanisms of hepatocel... \n",
- "\n",
- " text \\\n",
- "0 Macroautophagy is a dynamic process involving ... \n",
- "1 A new approach to rapid sequence comparison, b... \n",
- "2 Mitochondria are central players in the regula... \n",
- "3 The Pathway Interaction Database (PID, http://... \n",
- "4 A key aim of postgenomic biomedical research i... \n",
- "5 Metabolism is essential for life, and its alte... \n",
- "6 Recent advances in large datasets analysis off... \n",
- "7 The mechanistic target of rapamycin (mTOR) sig... \n",
- "8 Tunneling nanotubes (TNTs) are F-actin-based m... \n",
- "9 Hepatocellular carcinoma (HCC) is the most com... \n",
- "\n",
- " vector _distance \\\n",
- "0 [-0.042504933, 0.00053501845, -0.016986104, 0.... 0.312909 \n",
- "1 [-0.006935188, 0.020925103, -0.051218845, 0.00... 0.372769 \n",
- "2 [-0.014866754, 0.0028244434, -0.023141732, 0.0... 0.281554 \n",
- "3 [-0.007852315, 0.014019204, -0.026789214, -0.0... 0.378377 \n",
- "4 [0.012990677, 0.028128441, -0.006426807, -0.02... 0.376812 \n",
- "5 [-0.0037386382, 0.011562068, -0.022479024, 0.0... 0.370503 \n",
- "6 [-0.004613025, -0.0044279257, -0.013920496, 0.... 0.369777 \n",
- "7 [0.0006567143, 0.0053487234, -0.0010087299, -0... 0.376821 \n",
- "8 [-0.0063375738, 0.006348416, -0.034239322, 0.0... 0.380112 \n",
- "9 [0.03984485, 0.01583628, -0.00934351, -0.02993... 0.366526 \n",
- "\n",
- " old_similarity_rank new_scores \n",
- "0 2 -3.949219 \n",
- "1 6 -5.410156 \n",
- "2 1 -6.652344 \n",
- "3 9 -7.402344 \n",
- "4 7 -7.824219 \n",
- "5 5 -8.070312 \n",
- "6 4 -9.007812 \n",
- "7 8 -9.507812 \n",
- "8 10 -9.593750 \n",
- "9 3 -10.195312 "
- ],
"text/html": [
"\n",
" \n",
@@ -1906,10 +1819,72 @@
"
\n",
" \n",
" \n"
+ ],
+ "text/plain": [
+ " _id \\\n",
+ "0 a3345798b1faf238e8d805bbe9124b0b8e0c869f \n",
+ "1 3979cf5a013063e98ad0caf2e7110c2686cf1640 \n",
+ "2 83d323a5bb26b706d4f6d24eb27411a7e7ff57e6 \n",
+ "3 65f415c6d88aca139867702fc64aa179781b8e65 \n",
+ "4 0fb926cae217b70c97c74eb70b2a6b8c47574812 \n",
+ "5 1af2e075903a3cc5ad5a192921a0b4fb67645dc1 \n",
+ "6 c65945c08b7fd77ffd2c53369e8928699c3993e7 \n",
+ "7 488257dcbc7bcb56836f10a410e69c2c283989e5 \n",
+ "8 244fc78ce607812edb90290727dab4d33377e986 \n",
+ "9 e0534bfb477c5a82e98d0cb386ae3eb31d349c91 \n",
+ "\n",
+ " title \\\n",
+ "0 Autophagy as a regulated pathway of cellular d... \n",
+ "1 Basic local alignment search tool. \n",
+ "2 Protective action of green tea catechins in ne... \n",
+ "3 PID: the Pathway Interaction Database \n",
+ "4 Network biology: understanding the cell's func... \n",
+ "5 Mathematical models of cancer metabolism. \n",
+ "6 Comparing Alzheimer’s and Parkinson’s diseases... \n",
+ "7 mTOR Signaling in Growth Control and Disease \n",
+ "8 Transfer of mitochondria via tunneling nanotub... \n",
+ "9 Cellular and molecular mechanisms of hepatocel... \n",
+ "\n",
+ " text \\\n",
+ "0 Macroautophagy is a dynamic process involving ... \n",
+ "1 A new approach to rapid sequence comparison, b... \n",
+ "2 Mitochondria are central players in the regula... \n",
+ "3 The Pathway Interaction Database (PID, http://... \n",
+ "4 A key aim of postgenomic biomedical research i... \n",
+ "5 Metabolism is essential for life, and its alte... \n",
+ "6 Recent advances in large datasets analysis off... \n",
+ "7 The mechanistic target of rapamycin (mTOR) sig... \n",
+ "8 Tunneling nanotubes (TNTs) are F-actin-based m... \n",
+ "9 Hepatocellular carcinoma (HCC) is the most com... \n",
+ "\n",
+ " vector _distance \\\n",
+ "0 [-0.042504933, 0.00053501845, -0.016986104, 0.... 0.312909 \n",
+ "1 [-0.006935188, 0.020925103, -0.051218845, 0.00... 0.372769 \n",
+ "2 [-0.014866754, 0.0028244434, -0.023141732, 0.0... 0.281554 \n",
+ "3 [-0.007852315, 0.014019204, -0.026789214, -0.0... 0.378377 \n",
+ "4 [0.012990677, 0.028128441, -0.006426807, -0.02... 0.376812 \n",
+ "5 [-0.0037386382, 0.011562068, -0.022479024, 0.0... 0.370503 \n",
+ "6 [-0.004613025, -0.0044279257, -0.013920496, 0.... 0.369777 \n",
+ "7 [0.0006567143, 0.0053487234, -0.0010087299, -0... 0.376821 \n",
+ "8 [-0.0063375738, 0.006348416, -0.034239322, 0.0... 0.380112 \n",
+ "9 [0.03984485, 0.01583628, -0.00934351, -0.02993... 0.366526 \n",
+ "\n",
+ " old_similarity_rank new_scores \n",
+ "0 2 -3.949219 \n",
+ "1 6 -5.410156 \n",
+ "2 1 -6.652344 \n",
+ "3 9 -7.402344 \n",
+ "4 7 -7.824219 \n",
+ "5 5 -8.070312 \n",
+ "6 4 -9.007812 \n",
+ "7 8 -9.507812 \n",
+ "8 10 -9.593750 \n",
+ "9 3 -10.195312 "
]
},
+ "execution_count": 9,
"metadata": {},
- "execution_count": 9
+ "output_type": "execute_result"
}
],
"source": [
@@ -1958,98 +1933,93 @@
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
- "25222b6616e1489eb531950c958c5fdf": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
+ "00a0f8d7b04c495b91b6decf446c50d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_6b6cd8dbe43440b29bf705ebc04fede7",
- "IPY_MODEL_7b8c414f7aad49fe9f98760489d42ed1",
- "IPY_MODEL_886c86257b4645a2a929eb39f20ab8a3"
- ],
- "layout": "IPY_MODEL_f918256ef4874941a1ec098ea5050f6a"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "6b6cd8dbe43440b29bf705ebc04fede7": {
+ "01a1a33b691d427bb5cccce1f4b79693": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_187dee520d434a1eaab95c4b17723d35",
- "placeholder": "",
- "style": "IPY_MODEL_af680e36244e4f9691bb156d01c3b3b8",
- "value": "tokenizer_config.json: 100%"
- }
- },
- "7b8c414f7aad49fe9f98760489d42ed1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_413e5e69d5f246df9d530bcb797286d9",
- "max": 396,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_ada52c81a40444eca27763305e25ef92",
- "value": 396
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
}
},
- "886c86257b4645a2a929eb39f20ab8a3": {
+ "077036d733a84d2881bd0f4d486277b4": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3f5273f0ab8645368d73148784759cf1",
- "placeholder": "",
- "style": "IPY_MODEL_b72560a9b60348e1a2764f24a33188fa",
- "value": " 396/396 [00:00<00:00, 21.3kB/s]"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "f918256ef4874941a1ec098ea5050f6a": {
+ "0cb30ddb214540f8b74219a9fc77127b": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2098,77 +2068,83 @@
"width": null
}
},
- "187dee520d434a1eaab95c4b17723d35": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
+ "0d7c92de0c384d72aafada73e685aa08": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "af680e36244e4f9691bb156d01c3b3b8": {
+ "0dfe271be6914892b327d306e669f4aa": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0cb30ddb214540f8b74219a9fc77127b",
+ "placeholder": "",
+ "style": "IPY_MODEL_a6b63191503c43f691f28878fcd39b26",
+ "value": " 125/125 [00:00<00:00, 8.81kB/s]"
+ }
+ },
+ "109a9da70a6a4f6789fa397bf2a81fa4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c2115e12648343a4b0ca23455c46f9a5",
+ "placeholder": "",
+ "style": "IPY_MODEL_a7c7e8bbdbbf44649af4e40be262a959",
+ "value": " 232k/232k [00:00<00:00, 1.86MB/s]"
+ }
+ },
+ "10d4116aace649bbae035c02e13828d8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
},
- "413e5e69d5f246df9d530bcb797286d9": {
+ "11a0fb71fd0e486982215656adcd2bdc": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2217,26 +2193,10 @@
"width": null
}
},
- "ada52c81a40444eca27763305e25ef92": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "3f5273f0ab8645368d73148784759cf1": {
+ "11e884ac25d64b0bba935a296b17d5d9": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2285,47 +2245,47 @@
"width": null
}
},
- "b72560a9b60348e1a2764f24a33188fa": {
+ "13e60c9491d542099f1a881330ee1c04": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
},
- "80d917f992794502aa6828ed7d01af98": {
+ "158d4dd4e4f7495e9d2d6f360c29bf02": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_83c646c3a2f543949e2f02138e59e982",
- "IPY_MODEL_89706a0fb9e34f97b3ff6db95e2e87b5",
- "IPY_MODEL_109a9da70a6a4f6789fa397bf2a81fa4"
- ],
- "layout": "IPY_MODEL_8d58def01f5d412589983675059926ac"
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_11e884ac25d64b0bba935a296b17d5d9",
+ "placeholder": "",
+ "style": "IPY_MODEL_eb6fa6ee9e74440bb4ce2a92ee4548c7",
+ "value": "tokenizer.json: 100%"
}
},
- "83c646c3a2f543949e2f02138e59e982": {
+ "15b01052fc6140e2be8fae7c2d2928fa": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -2337,61 +2297,61 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_2fcb6704693b4842b7c4a224e2d916fd",
+ "layout": "IPY_MODEL_11a0fb71fd0e486982215656adcd2bdc",
"placeholder": "",
- "style": "IPY_MODEL_72c8de55a48d4fa8ae79b9289cbee1d4",
- "value": "vocab.txt: 100%"
+ "style": "IPY_MODEL_5be1ec5880cb459fb7a88ae7c1f2394f",
+ "value": " 25657/25657 [00:00<00:00, 77019.91 examples/s]"
}
},
- "89706a0fb9e34f97b3ff6db95e2e87b5": {
+ "166beb1aa15b4927a9c27fda4a8d6de1": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
+ "_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_fb03d4b1113a42d1914557a26058d82f",
- "max": 231508,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_d6b75db679df4a849b362c77df481e30",
- "value": 231508
+ "layout": "IPY_MODEL_83474dc942a44919a4e48ee36b65f8f6",
+ "placeholder": "",
+ "style": "IPY_MODEL_77002ce5084c44b8b06987bee947f099",
+ "value": "Downloading data: 100%"
}
},
- "109a9da70a6a4f6789fa397bf2a81fa4": {
+ "16dd8588f2464d4281c0dde85cc28c6d": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_c2115e12648343a4b0ca23455c46f9a5",
- "placeholder": "",
- "style": "IPY_MODEL_a7c7e8bbdbbf44649af4e40be262a959",
- "value": " 232k/232k [00:00<00:00, 1.86MB/s]"
+ "layout": "IPY_MODEL_9027c073a81a4360adbe90ed3bd9c099",
+ "max": 711649,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_5fe367a2cb2b4f949b4b667d1d47e49c",
+ "value": 711649
}
},
- "8d58def01f5d412589983675059926ac": {
+ "187dee520d434a1eaab95c4b17723d35": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2440,10 +2400,10 @@
"width": null
}
},
- "2fcb6704693b4842b7c4a224e2d916fd": {
+ "1897e98d30ea4d3896c3c2a2b9b2c23e": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2492,10 +2452,25 @@
"width": null
}
},
- "72c8de55a48d4fa8ae79b9289cbee1d4": {
+ "18df3037afae470e8ac9d297f93fd9ce": {
"model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "199779c3a63c4632be9c8fa65b7f33d8": {
+ "model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -2507,10 +2482,10 @@
"description_width": ""
}
},
- "fb03d4b1113a42d1914557a26058d82f": {
+ "1d5908be44944e41a0b81875afb14411": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2559,26 +2534,25 @@
"width": null
}
},
- "d6b75db679df4a849b362c77df481e30": {
+ "1d616b7a849f4a83a0f1d8dd446b96f7": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
- "bar_color": null,
"description_width": ""
}
},
- "c2115e12648343a4b0ca23455c46f9a5": {
+ "1e011635dca34cae8ac44614bdfdf88c": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2627,10 +2601,10 @@
"width": null
}
},
- "a7c7e8bbdbbf44649af4e40be262a959": {
+ "1fbd0891d5a24a54ae54656b0d8a6247": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -2642,98 +2616,10 @@
"description_width": ""
}
},
- "db68c63dcd244ca9b8b391559f8abfd1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_158d4dd4e4f7495e9d2d6f360c29bf02",
- "IPY_MODEL_16dd8588f2464d4281c0dde85cc28c6d",
- "IPY_MODEL_ad2279ff1d4d47068d037ca698005140"
- ],
- "layout": "IPY_MODEL_ca7870dc84ec48c681f6411595f321ad"
- }
- },
- "158d4dd4e4f7495e9d2d6f360c29bf02": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_11e884ac25d64b0bba935a296b17d5d9",
- "placeholder": "",
- "style": "IPY_MODEL_eb6fa6ee9e74440bb4ce2a92ee4548c7",
- "value": "tokenizer.json: 100%"
- }
- },
- "16dd8588f2464d4281c0dde85cc28c6d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_9027c073a81a4360adbe90ed3bd9c099",
- "max": 711649,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_5fe367a2cb2b4f949b4b667d1d47e49c",
- "value": 711649
- }
- },
- "ad2279ff1d4d47068d037ca698005140": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_52e6dbcd90824ad096c1b610123df935",
- "placeholder": "",
- "style": "IPY_MODEL_62450a7fb0e540688ee9ad510a290609",
- "value": " 712k/712k [00:00<00:00, 3.65MB/s]"
- }
- },
- "ca7870dc84ec48c681f6411595f321ad": {
+ "20f4ea917f4143cd9349fe3afa9c040d": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2782,10 +2668,118 @@
"width": null
}
},
- "11e884ac25d64b0bba935a296b17d5d9": {
+ "21b3538c53cd4ff5895a817782884101": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "22c4bb69842a43aa83fcba4eb6c1406c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f22ae0abe3cb40a5b97559c7216400e7",
+ "max": 443,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_abc7b65193fb49f7899b10820992e163",
+ "value": 443
+ }
+ },
+ "25222b6616e1489eb531950c958c5fdf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6b6cd8dbe43440b29bf705ebc04fede7",
+ "IPY_MODEL_7b8c414f7aad49fe9f98760489d42ed1",
+ "IPY_MODEL_886c86257b4645a2a929eb39f20ab8a3"
+ ],
+ "layout": "IPY_MODEL_f918256ef4874941a1ec098ea5050f6a"
+ }
+ },
+ "27b6a73af53d4cf1946ae2ece8c499e2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "2887f8a70d8a45d5b633ec2106865a45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "28a64ca9341341449b9e778d73db6321": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2ae210dcb6bd47d584b980d478b254a2": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2834,25 +2828,113 @@
"width": null
}
},
- "eb6fa6ee9e74440bb4ce2a92ee4548c7": {
+ "2be215a977184227abd5983e7c81b3ff": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6fb5d22a5b744b98907c7d36ad675e37",
+ "IPY_MODEL_4eca6411f44a4a638423a51289957959",
+ "IPY_MODEL_b99e200f694d4e2a83360752c6b5441c"
+ ],
+ "layout": "IPY_MODEL_c61d80f6b40a4716b18ab7555fe604d1"
+ }
+ },
+ "2bea46a2708d4e61910dc8138f99426b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
},
- "9027c073a81a4360adbe90ed3bd9c099": {
+ "2bfe8c958a9d44f781453b529255e01f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_e7a255788ec94998924142f4255ce409",
+ "IPY_MODEL_e409114ff67443ca92cb46ffb0697b58",
+ "IPY_MODEL_3e13281360324914921f135ba80e9672"
+ ],
+ "layout": "IPY_MODEL_1d5908be44944e41a0b81875afb14411"
+ }
+ },
+ "2eed217c58f34c83b5b15bf2f955d9a0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3fef4607ccd943008c272f66f9bf08b8",
+ "placeholder": "",
+ "style": "IPY_MODEL_0d7c92de0c384d72aafada73e685aa08",
+ "value": "config.json: 100%"
+ }
+ },
+ "2f98cd9e8e5f4466a7a2cf88b087ce55": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_5c07f16b1bb04a55bdcf0ae2476b78dd",
+ "IPY_MODEL_4e3741de8a1e4021a49e8abfb925c563",
+ "IPY_MODEL_0dfe271be6914892b327d306e669f4aa"
+ ],
+ "layout": "IPY_MODEL_d88597e336ad4297a4aa6bd3d7fdc5dd"
+ }
+ },
+ "2fcb6704693b4842b7c4a224e2d916fd": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2901,10 +2983,10 @@
"width": null
}
},
- "5fe367a2cb2b4f949b4b667d1d47e49c": {
+ "32b556bd8ddc4ef196eba4d1fd6b6b62": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -2917,10 +2999,10 @@
"description_width": ""
}
},
- "52e6dbcd90824ad096c1b610123df935": {
+ "33ba91ddcbd74a9e870ed3c2ba9f86d0": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2969,10 +3051,10 @@
"width": null
}
},
- "62450a7fb0e540688ee9ad510a290609": {
+ "344dc2f380944602b0d5a712dad8473c": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -2984,10 +3066,10 @@
"description_width": ""
}
},
- "2f98cd9e8e5f4466a7a2cf88b087ce55": {
+ "373fe974f094461d87f5b40ad6aa4e91": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -2999,83 +3081,82 @@
"_view_name": "HBoxView",
"box_style": "",
"children": [
- "IPY_MODEL_5c07f16b1bb04a55bdcf0ae2476b78dd",
- "IPY_MODEL_4e3741de8a1e4021a49e8abfb925c563",
- "IPY_MODEL_0dfe271be6914892b327d306e669f4aa"
+ "IPY_MODEL_f1dd6f744bf34a3aa1ecd72115f63155",
+ "IPY_MODEL_e5486dfd2ecd411eb60f3e3a89b64660",
+ "IPY_MODEL_5865b1aead9641d1bed54d9d166945f8"
],
- "layout": "IPY_MODEL_d88597e336ad4297a4aa6bd3d7fdc5dd"
+ "layout": "IPY_MODEL_41de8969ac714df5a4f4132f440af675"
}
},
- "5c07f16b1bb04a55bdcf0ae2476b78dd": {
+ "3ab4015a66824bb3a2374d5a090e4e35": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1e011635dca34cae8ac44614bdfdf88c",
- "placeholder": "",
- "style": "IPY_MODEL_ba01a8212e0741c48d0d49095cfb5c17",
- "value": "special_tokens_map.json: 100%"
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_63a499ae50bb4c12b4cf90dca53d0a07",
+ "IPY_MODEL_9d04d5b70be34666848a0347437cb7ea",
+ "IPY_MODEL_ab924099c7cc4d31a60afec68a0ff0d1"
+ ],
+ "layout": "IPY_MODEL_92bbac1ac3da4331abc4c0afe7fffbd6"
}
},
- "4e3741de8a1e4021a49e8abfb925c563": {
+ "3e13281360324914921f135ba80e9672": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
+ "_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_fd80d3b519124a759df834da4af06967",
- "max": 125,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_5f419e6126ad421ea2efa5b73b38aef5",
- "value": 125
+ "layout": "IPY_MODEL_4c51bcbf31774cda86465a3ec707831d",
+ "placeholder": "",
+ "style": "IPY_MODEL_8bb2cf9274a84b84ac8d20d4d38aaecc",
+ "value": " 279/279 [00:00<00:00, 13.5kB/s]"
}
},
- "0dfe271be6914892b327d306e669f4aa": {
+ "3e3303826e33485ca844fc82a1035b61": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_0cb30ddb214540f8b74219a9fc77127b",
- "placeholder": "",
- "style": "IPY_MODEL_a6b63191503c43f691f28878fcd39b26",
- "value": " 125/125 [00:00<00:00, 8.81kB/s]"
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_c32fbe6f9b39480985e3bff59f6fcccd",
+ "IPY_MODEL_ad77277175bb456a9a6ce15af4aa5868",
+ "IPY_MODEL_a6b0d04284b748adb9e74530d25589e0"
+ ],
+ "layout": "IPY_MODEL_2ae210dcb6bd47d584b980d478b254a2"
}
},
- "d88597e336ad4297a4aa6bd3d7fdc5dd": {
+ "3f5273f0ab8645368d73148784759cf1": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3124,10 +3205,10 @@
"width": null
}
},
- "1e011635dca34cae8ac44614bdfdf88c": {
+ "3fef4607ccd943008c272f66f9bf08b8": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3176,25 +3257,26 @@
"width": null
}
},
- "ba01a8212e0741c48d0d49095cfb5c17": {
+ "4015861324f64d4ab2b1a7a4153266ff": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
},
- "fd80d3b519124a759df834da4af06967": {
+ "407cf90293e44890903a4d89ee08008a": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3243,26 +3325,10 @@
"width": null
}
},
- "5f419e6126ad421ea2efa5b73b38aef5": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "0cb30ddb214540f8b74219a9fc77127b": {
+ "413e5e69d5f246df9d530bcb797286d9": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3311,117 +3377,14 @@
"width": null
}
},
- "a6b63191503c43f691f28878fcd39b26": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
+ "41de8969ac714df5a4f4132f440af675": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "4d174874270f4e0f88eb27a16aa0f11c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_2eed217c58f34c83b5b15bf2f955d9a0",
- "IPY_MODEL_63a17bb1290a428da71bfe76a08e04db",
- "IPY_MODEL_a35d2f0d849d474f82cbd3dc6879b12d"
- ],
- "layout": "IPY_MODEL_f2f00b1f73954d95b95889fa1a34c5ae"
- }
- },
- "2eed217c58f34c83b5b15bf2f955d9a0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_3fef4607ccd943008c272f66f9bf08b8",
- "placeholder": "",
- "style": "IPY_MODEL_0d7c92de0c384d72aafada73e685aa08",
- "value": "config.json: 100%"
- }
- },
- "63a17bb1290a428da71bfe76a08e04db": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_c8ebd32170a44ab6bfedee79ea5509ec",
- "max": 731,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_13e60c9491d542099f1a881330ee1c04",
- "value": 731
- }
- },
- "a35d2f0d849d474f82cbd3dc6879b12d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1897e98d30ea4d3896c3c2a2b9b2c23e",
- "placeholder": "",
- "style": "IPY_MODEL_9019bbd2898447fc8c692163e223b4b1",
- "value": " 731/731 [00:00<00:00, 52.6kB/s]"
- }
- },
- "f2f00b1f73954d95b95889fa1a34c5ae": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
@@ -3466,10 +3429,34 @@
"width": null
}
},
- "3fef4607ccd943008c272f66f9bf08b8": {
+ "434d6bee7bfc4aa5bab6c2a080a193e5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a938e445c6674332b9d6253358d0e1e0",
+ "max": 93304,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_01a1a33b691d427bb5cccce1f4b79693",
+ "value": 93304
+ }
+ },
+ "453f440ade9946a5b12bfe6cea948368": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3518,25 +3505,31 @@
"width": null
}
},
- "0d7c92de0c384d72aafada73e685aa08": {
+ "471ea20537984f18a07b3a198750c3e0": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_fc2403f083124228befe690caad6dd3d",
+ "placeholder": "",
+ "style": "IPY_MODEL_a60c75bb501b49e48d09dd50cb645bdd",
+ "value": " 1000/1000 [00:00<00:00, 25314.62 examples/s]"
}
},
- "c8ebd32170a44ab6bfedee79ea5509ec": {
+ "498ff77afafa440cb0f6afb39627ec1d": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3585,26 +3578,31 @@
"width": null
}
},
- "13e60c9491d542099f1a881330ee1c04": {
+ "4a0c748f613a4a979f7eaee825878044": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_bc9715c03c4d41f7b13cdc3fbca26b1b",
+ "placeholder": "",
+ "style": "IPY_MODEL_55efb72d3ac94e85b0259ade8318a84a",
+ "value": "model.safetensors: 100%"
}
},
- "1897e98d30ea4d3896c3c2a2b9b2c23e": {
+ "4c51bcbf31774cda86465a3ec707831d": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3653,25 +3651,26 @@
"width": null
}
},
- "9019bbd2898447fc8c692163e223b4b1": {
+ "4c955a42756d47eea9a00a87c4b5f0f0": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
},
- "8a12ffea544146c29541b3b4a1c6db2b": {
+ "4d174874270f4e0f88eb27a16aa0f11c": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -3683,38 +3682,41 @@
"_view_name": "HBoxView",
"box_style": "",
"children": [
- "IPY_MODEL_4a0c748f613a4a979f7eaee825878044",
- "IPY_MODEL_c3be2b813b664e0cb9443c2aa0707afc",
- "IPY_MODEL_9c594928a30a4b038c9779c23eaf9fb6"
+ "IPY_MODEL_2eed217c58f34c83b5b15bf2f955d9a0",
+ "IPY_MODEL_63a17bb1290a428da71bfe76a08e04db",
+ "IPY_MODEL_a35d2f0d849d474f82cbd3dc6879b12d"
],
- "layout": "IPY_MODEL_aa7efcd26d21435a9b951a872d122c25"
+ "layout": "IPY_MODEL_f2f00b1f73954d95b95889fa1a34c5ae"
}
},
- "4a0c748f613a4a979f7eaee825878044": {
+ "4e3741de8a1e4021a49e8abfb925c563": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_bc9715c03c4d41f7b13cdc3fbca26b1b",
- "placeholder": "",
- "style": "IPY_MODEL_55efb72d3ac94e85b0259ade8318a84a",
- "value": "model.safetensors: 100%"
+ "layout": "IPY_MODEL_fd80d3b519124a759df834da4af06967",
+ "max": 125,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_5f419e6126ad421ea2efa5b73b38aef5",
+ "value": 125
}
},
- "c3be2b813b664e0cb9443c2aa0707afc": {
+ "4eca6411f44a4a638423a51289957959": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -3727,54 +3729,77 @@
"bar_style": "success",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_8bd10565e96a46c2882d635a524593f2",
- "max": 437955512,
+ "layout": "IPY_MODEL_850b2174e35d49579060721becfe4287",
+ "max": 5069051,
"min": 0,
"orientation": "horizontal",
- "style": "IPY_MODEL_10d4116aace649bbae035c02e13828d8",
- "value": 437955512
+ "style": "IPY_MODEL_2bea46a2708d4e61910dc8138f99426b",
+ "value": 5069051
}
},
- "9c594928a30a4b038c9779c23eaf9fb6": {
+ "4f46a6443368428193090a6a4ddf3473": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_6ebe20cd50074cafad52f37818123cff",
- "placeholder": "",
- "style": "IPY_MODEL_e37f90735fd94698a4172e0292da7c9f",
- "value": " 438M/438M [00:03<00:00, 121MB/s]"
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b2c7ed0a7fcd45229194b5422b7801d3",
+ "IPY_MODEL_434d6bee7bfc4aa5bab6c2a080a193e5",
+ "IPY_MODEL_7e9a93af505a4b1a96c947363bbae0b9"
+ ],
+ "layout": "IPY_MODEL_453f440ade9946a5b12bfe6cea948368"
}
},
- "aa7efcd26d21435a9b951a872d122c25": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
+ "501f90bcf1ff4efe81cd377df249415e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_aa29504327084ed5816d90ca3f9e9f16",
+ "IPY_MODEL_71c123550e2a4166955eef2f142170fb",
+ "IPY_MODEL_15b01052fc6140e2be8fae7c2d2928fa"
+ ],
+ "layout": "IPY_MODEL_8badd98fa463404aa80f604b45f4a912"
+ }
+ },
+ "52e6dbcd90824ad096c1b610123df935": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
@@ -3808,10 +3833,77 @@
"width": null
}
},
- "bc9715c03c4d41f7b13cdc3fbca26b1b": {
+ "55efb72d3ac94e85b0259ade8318a84a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "584e80ac477d4a01b23405a5fa29f092": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "5865b1aead9641d1bed54d9d166945f8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_20f4ea917f4143cd9349fe3afa9c040d",
+ "placeholder": "",
+ "style": "IPY_MODEL_344dc2f380944602b0d5a712dad8473c",
+ "value": " 17.1M/17.1M [00:00<00:00, 87.8MB/s]"
+ }
+ },
+ "5913015323b349ac83f29dc3419ee468": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "5a8b708edb414013a6915a9cbbe95f0a": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3860,25 +3952,10 @@
"width": null
}
},
- "55efb72d3ac94e85b0259ade8318a84a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "8bd10565e96a46c2882d635a524593f2": {
+ "5aae76de0c3b42fb81642990d8bbdf93": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3927,10 +4004,10 @@
"width": null
}
},
- "10d4116aace649bbae035c02e13828d8": {
+ "5baf674c98a846e1a79fda9c8ee77e78": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -3943,10 +4020,46 @@
"description_width": ""
}
},
- "6ebe20cd50074cafad52f37818123cff": {
+ "5be1ec5880cb459fb7a88ae7c1f2394f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "5c07f16b1bb04a55bdcf0ae2476b78dd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1e011635dca34cae8ac44614bdfdf88c",
+ "placeholder": "",
+ "style": "IPY_MODEL_ba01a8212e0741c48d0d49095cfb5c17",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "5cd148642750417abd38cbf483ccf1f9": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3995,68 +4108,57 @@
"width": null
}
},
- "e37f90735fd94698a4172e0292da7c9f": {
+ "5f419e6126ad421ea2efa5b73b38aef5": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
},
- "9003eadfcead41aaaadabf18a706200f": {
+ "5fe367a2cb2b4f949b4b667d1d47e49c": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_9e8a582cb44a4b618edc8d7844956a93",
- "IPY_MODEL_22c4bb69842a43aa83fcba4eb6c1406c",
- "IPY_MODEL_e191dbd0809c47c7ba28d3f6a0fcb1c5"
- ],
- "layout": "IPY_MODEL_b7d5f50552744a528b99873318ee1bfc"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
}
},
- "9e8a582cb44a4b618edc8d7844956a93": {
+ "62450a7fb0e540688ee9ad510a290609": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_8b4cefc11b4a403eaafeafefdb0cd763",
- "placeholder": "",
- "style": "IPY_MODEL_077036d733a84d2881bd0f4d486277b4",
- "value": "tokenizer_config.json: 100%"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "22c4bb69842a43aa83fcba4eb6c1406c": {
+ "63a17bb1290a428da71bfe76a08e04db": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -4069,18 +4171,18 @@
"bar_style": "success",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_f22ae0abe3cb40a5b97559c7216400e7",
- "max": 443,
+ "layout": "IPY_MODEL_c8ebd32170a44ab6bfedee79ea5509ec",
+ "max": 731,
"min": 0,
"orientation": "horizontal",
- "style": "IPY_MODEL_abc7b65193fb49f7899b10820992e163",
- "value": 443
+ "style": "IPY_MODEL_13e60c9491d542099f1a881330ee1c04",
+ "value": 731
}
},
- "e191dbd0809c47c7ba28d3f6a0fcb1c5": {
+ "63a499ae50bb4c12b4cf90dca53d0a07": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -4092,29 +4194,101 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_7410dd99fca940159ba8d13c9c52bae3",
+ "layout": "IPY_MODEL_bc01b443cb204014bcbcf7cb0fea4c86",
"placeholder": "",
- "style": "IPY_MODEL_6710ae95aea9445ab998afb5d0bb3241",
- "value": " 443/443 [00:00<00:00, 35.6kB/s]"
+ "style": "IPY_MODEL_28a64ca9341341449b9e778d73db6321",
+ "value": "config.json: 100%"
}
},
- "b7d5f50552744a528b99873318ee1bfc": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
+ "6710ae95aea9445ab998afb5d0bb3241": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "691424012c59434f8cc17f3d6aa001f3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_8cad72468680488aa62c33186cedf084",
+ "placeholder": "",
+ "style": "IPY_MODEL_21b3538c53cd4ff5895a817782884101",
+ "value": "100%"
+ }
+ },
+ "6b6cd8dbe43440b29bf705ebc04fede7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_187dee520d434a1eaab95c4b17723d35",
+ "placeholder": "",
+ "style": "IPY_MODEL_af680e36244e4f9691bb156d01c3b3b8",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "6dcfc1e9c851445e95b862f30dfc8dee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6ebe20cd50074cafad52f37818123cff": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
@@ -4150,10 +4324,70 @@
"width": null
}
},
- "8b4cefc11b4a403eaafeafefdb0cd763": {
+ "6fb5d22a5b744b98907c7d36ad675e37": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b7dfac1fc6f047d8a611ad711e18bbe7",
+ "placeholder": "",
+ "style": "IPY_MODEL_5913015323b349ac83f29dc3419ee468",
+ "value": "sentencepiece.bpe.model: 100%"
+ }
+ },
+ "71c123550e2a4166955eef2f142170fb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9c0796729bb0455d92a4f418e86fa38a",
+ "max": 25657,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_5baf674c98a846e1a79fda9c8ee77e78",
+ "value": 25657
+ }
+ },
+ "72c8de55a48d4fa8ae79b9289cbee1d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "7410dd99fca940159ba8d13c9c52bae3": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -4202,10 +4436,10 @@
"width": null
}
},
- "077036d733a84d2881bd0f4d486277b4": {
+ "77002ce5084c44b8b06987bee947f099": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -4217,10 +4451,34 @@
"description_width": ""
}
},
- "f22ae0abe3cb40a5b97559c7216400e7": {
+ "7b8c414f7aad49fe9f98760489d42ed1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_413e5e69d5f246df9d530bcb797286d9",
+ "max": 396,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_ada52c81a40444eca27763305e25ef92",
+ "value": 396
+ }
+ },
+ "7dd04c9ff2b34590ade55890b2f47b88": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -4269,26 +4527,89 @@
"width": null
}
},
- "abc7b65193fb49f7899b10820992e163": {
+ "7e9a93af505a4b1a96c947363bbae0b9": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_33ba91ddcbd74a9e870ed3c2ba9f86d0",
+ "placeholder": "",
+ "style": "IPY_MODEL_803e1962e31b4a918c28e9ff20732313",
+ "value": " 93.3k/93.3k [00:00<00:00, 528kB/s]"
+ }
+ },
+ "7fd4ee74216249ae806b5d4045da9523": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_bbbc8e741a0b44ef835e10fee58bbadf",
+ "placeholder": "",
+ "style": "IPY_MODEL_1fbd0891d5a24a54ae54656b0d8a6247",
+ "value": " 19.0M/19.0M [00:02<00:00, 7.08MB/s]"
+ }
+ },
+ "803e1962e31b4a918c28e9ff20732313": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
- "bar_color": null,
"description_width": ""
}
},
- "7410dd99fca940159ba8d13c9c52bae3": {
+ "80d917f992794502aa6828ed7d01af98": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_83c646c3a2f543949e2f02138e59e982",
+ "IPY_MODEL_89706a0fb9e34f97b3ff6db95e2e87b5",
+ "IPY_MODEL_109a9da70a6a4f6789fa397bf2a81fa4"
+ ],
+ "layout": "IPY_MODEL_8d58def01f5d412589983675059926ac"
+ }
+ },
+ "83474dc942a44919a4e48ee36b65f8f6": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -4337,47 +4658,83 @@
"width": null
}
},
- "6710ae95aea9445ab998afb5d0bb3241": {
+ "83c646c3a2f543949e2f02138e59e982": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2fcb6704693b4842b7c4a224e2d916fd",
+ "placeholder": "",
+ "style": "IPY_MODEL_72c8de55a48d4fa8ae79b9289cbee1d4",
+ "value": "vocab.txt: 100%"
}
},
- "2be215a977184227abd5983e7c81b3ff": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
+ "850b2174e35d49579060721becfe4287": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_6fb5d22a5b744b98907c7d36ad675e37",
- "IPY_MODEL_4eca6411f44a4a638423a51289957959",
- "IPY_MODEL_b99e200f694d4e2a83360752c6b5441c"
- ],
- "layout": "IPY_MODEL_c61d80f6b40a4716b18ab7555fe604d1"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "6fb5d22a5b744b98907c7d36ad675e37": {
+ "886c86257b4645a2a929eb39f20ab8a3": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -4389,16 +4746,16 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_b7dfac1fc6f047d8a611ad711e18bbe7",
+ "layout": "IPY_MODEL_3f5273f0ab8645368d73148784759cf1",
"placeholder": "",
- "style": "IPY_MODEL_5913015323b349ac83f29dc3419ee468",
- "value": "sentencepiece.bpe.model: 100%"
+ "style": "IPY_MODEL_b72560a9b60348e1a2764f24a33188fa",
+ "value": " 396/396 [00:00<00:00, 21.3kB/s]"
}
},
- "4eca6411f44a4a638423a51289957959": {
+ "89706a0fb9e34f97b3ff6db95e2e87b5": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -4411,39 +4768,62 @@
"bar_style": "success",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_850b2174e35d49579060721becfe4287",
- "max": 5069051,
+ "layout": "IPY_MODEL_fb03d4b1113a42d1914557a26058d82f",
+ "max": 231508,
"min": 0,
"orientation": "horizontal",
- "style": "IPY_MODEL_2bea46a2708d4e61910dc8138f99426b",
- "value": 5069051
+ "style": "IPY_MODEL_d6b75db679df4a849b362c77df481e30",
+ "value": 231508
}
},
- "b99e200f694d4e2a83360752c6b5441c": {
+ "8a12ffea544146c29541b3b4a1c6db2b": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5a8b708edb414013a6915a9cbbe95f0a",
- "placeholder": "",
- "style": "IPY_MODEL_f451f6f10dfc45049f07c44e12b04836",
- "value": " 5.07M/5.07M [00:01<00:00, 3.74MB/s]"
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4a0c748f613a4a979f7eaee825878044",
+ "IPY_MODEL_c3be2b813b664e0cb9443c2aa0707afc",
+ "IPY_MODEL_9c594928a30a4b038c9779c23eaf9fb6"
+ ],
+ "layout": "IPY_MODEL_aa7efcd26d21435a9b951a872d122c25"
}
},
- "c61d80f6b40a4716b18ab7555fe604d1": {
+ "8a78b64d3f6b4ffdae7e79266d798635": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_bb9b8200e6be4e18a8cf38b14b03e4ce",
+ "IPY_MODEL_a83bd13e5c6e4cbb96c2c2f7acfe8423",
+ "IPY_MODEL_471ea20537984f18a07b3a198750c3e0"
+ ],
+ "layout": "IPY_MODEL_f79797ffd6a649b1a0edae63eee91bea"
+ }
+ },
+ "8b4cefc11b4a403eaafeafefdb0cd763": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -4492,10 +4872,10 @@
"width": null
}
},
- "b7dfac1fc6f047d8a611ad711e18bbe7": {
+ "8badd98fa463404aa80f604b45f4a912": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -4544,10 +4924,10 @@
"width": null
}
},
- "5913015323b349ac83f29dc3419ee468": {
+ "8bb2cf9274a84b84ac8d20d4d38aaecc": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -4559,10 +4939,10 @@
"description_width": ""
}
},
- "850b2174e35d49579060721becfe4287": {
+ "8bd10565e96a46c2882d635a524593f2": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -4611,26 +4991,62 @@
"width": null
}
},
- "2bea46a2708d4e61910dc8138f99426b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
+ "8cad72468680488aa62c33186cedf084": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "5a8b708edb414013a6915a9cbbe95f0a": {
+ "8d58def01f5d412589983675059926ac": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -4679,25 +5095,10 @@
"width": null
}
},
- "f451f6f10dfc45049f07c44e12b04836": {
+ "9003eadfcead41aaaadabf18a706200f": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "373fe974f094461d87f5b40ad6aa4e91": {
- "model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
- "model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -4709,83 +5110,32 @@
"_view_name": "HBoxView",
"box_style": "",
"children": [
- "IPY_MODEL_f1dd6f744bf34a3aa1ecd72115f63155",
- "IPY_MODEL_e5486dfd2ecd411eb60f3e3a89b64660",
- "IPY_MODEL_5865b1aead9641d1bed54d9d166945f8"
+ "IPY_MODEL_9e8a582cb44a4b618edc8d7844956a93",
+ "IPY_MODEL_22c4bb69842a43aa83fcba4eb6c1406c",
+ "IPY_MODEL_e191dbd0809c47c7ba28d3f6a0fcb1c5"
],
- "layout": "IPY_MODEL_41de8969ac714df5a4f4132f440af675"
+ "layout": "IPY_MODEL_b7d5f50552744a528b99873318ee1bfc"
}
},
- "f1dd6f744bf34a3aa1ecd72115f63155": {
+ "9019bbd2898447fc8c692163e223b4b1": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5aae76de0c3b42fb81642990d8bbdf93",
- "placeholder": "",
- "style": "IPY_MODEL_cef20c5f499646e291329b580cf3800f",
- "value": "tokenizer.json: 100%"
- }
- },
- "e5486dfd2ecd411eb60f3e3a89b64660": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_90eac81b19a04354ad842f3fbe87e694",
- "max": 17098107,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_cc666236572240f8b1015f187a2f66d9",
- "value": 17098107
- }
- },
- "5865b1aead9641d1bed54d9d166945f8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_20f4ea917f4143cd9349fe3afa9c040d",
- "placeholder": "",
- "style": "IPY_MODEL_344dc2f380944602b0d5a712dad8473c",
- "value": " 17.1M/17.1M [00:00<00:00, 87.8MB/s]"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "41de8969ac714df5a4f4132f440af675": {
+ "9027c073a81a4360adbe90ed3bd9c099": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -4834,10 +5184,10 @@
"width": null
}
},
- "5aae76de0c3b42fb81642990d8bbdf93": {
+ "90eac81b19a04354ad842f3fbe87e694": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -4886,25 +5236,10 @@
"width": null
}
},
- "cef20c5f499646e291329b580cf3800f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "90eac81b19a04354ad842f3fbe87e694": {
+ "92bbac1ac3da4331abc4c0afe7fffbd6": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -4953,26 +5288,62 @@
"width": null
}
},
- "cc666236572240f8b1015f187a2f66d9": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
+ "944f4a261b5240408ab7fc473c7b0835": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "20f4ea917f4143cd9349fe3afa9c040d": {
+ "9c0796729bb0455d92a4f418e86fa38a": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -5021,47 +5392,55 @@
"width": null
}
},
- "344dc2f380944602b0d5a712dad8473c": {
+ "9c594928a30a4b038c9779c23eaf9fb6": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6ebe20cd50074cafad52f37818123cff",
+ "placeholder": "",
+ "style": "IPY_MODEL_e37f90735fd94698a4172e0292da7c9f",
+ "value": " 438M/438M [00:03<00:00, 121MB/s]"
}
},
- "2bfe8c958a9d44f781453b529255e01f": {
+ "9d04d5b70be34666848a0347437cb7ea": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
+ "_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_e7a255788ec94998924142f4255ce409",
- "IPY_MODEL_e409114ff67443ca92cb46ffb0697b58",
- "IPY_MODEL_3e13281360324914921f135ba80e9672"
- ],
- "layout": "IPY_MODEL_1d5908be44944e41a0b81875afb14411"
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ba10edc885244040ab89498be10bd4db",
+ "max": 799,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_584e80ac477d4a01b23405a5fa29f092",
+ "value": 799
}
},
- "e7a255788ec94998924142f4255ce409": {
+ "9e8a582cb44a4b618edc8d7844956a93": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -5073,40 +5452,31 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_d30b92db93594fe8b2f83241bb498f78",
+ "layout": "IPY_MODEL_8b4cefc11b4a403eaafeafefdb0cd763",
"placeholder": "",
- "style": "IPY_MODEL_9f679d0c8c5e4bf59111200f955ae8d7",
- "value": "special_tokens_map.json: 100%"
+ "style": "IPY_MODEL_077036d733a84d2881bd0f4d486277b4",
+ "value": "tokenizer_config.json: 100%"
}
},
- "e409114ff67443ca92cb46ffb0697b58": {
+ "9f679d0c8c5e4bf59111200f955ae8d7": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_944f4a261b5240408ab7fc473c7b0835",
- "max": 279,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_32b556bd8ddc4ef196eba4d1fd6b6b62",
- "value": 279
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "3e13281360324914921f135ba80e9672": {
+ "a35d2f0d849d474f82cbd3dc6879b12d": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -5118,120 +5488,89 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_4c51bcbf31774cda86465a3ec707831d",
+ "layout": "IPY_MODEL_1897e98d30ea4d3896c3c2a2b9b2c23e",
"placeholder": "",
- "style": "IPY_MODEL_8bb2cf9274a84b84ac8d20d4d38aaecc",
- "value": " 279/279 [00:00<00:00, 13.5kB/s]"
+ "style": "IPY_MODEL_9019bbd2898447fc8c692163e223b4b1",
+ "value": " 731/731 [00:00<00:00, 52.6kB/s]"
}
},
- "1d5908be44944e41a0b81875afb14411": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
+ "a3eb172e9d324da1bd7d8914e66d2106": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_166beb1aa15b4927a9c27fda4a8d6de1",
+ "IPY_MODEL_f2667dbbb4c5462986c9cad904767540",
+ "IPY_MODEL_7fd4ee74216249ae806b5d4045da9523"
+ ],
+ "layout": "IPY_MODEL_b5fd41d1dba0476491cb311bd4d47741"
}
},
- "d30b92db93594fe8b2f83241bb498f78": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
+ "a60c75bb501b49e48d09dd50cb645bdd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "9f679d0c8c5e4bf59111200f955ae8d7": {
+ "a6b0d04284b748adb9e74530d25589e0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7dd04c9ff2b34590ade55890b2f47b88",
+ "placeholder": "",
+ "style": "IPY_MODEL_c33d33427cc245eebe6e190684304904",
+ "value": " 1.11G/1.11G [00:08<00:00, 175MB/s]"
+ }
+ },
+ "a6b63191503c43f691f28878fcd39b26": {
"model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a7c7e8bbdbbf44649af4e40be262a959": {
+ "model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -5243,10 +5582,34 @@
"description_width": ""
}
},
- "944f4a261b5240408ab7fc473c7b0835": {
+ "a83bd13e5c6e4cbb96c2c2f7acfe8423": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e582de00e2af4948b9f072653c787712",
+ "max": 1000,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2887f8a70d8a45d5b633ec2106865a45",
+ "value": 1000
+ }
+ },
+ "a938e445c6674332b9d6253358d0e1e0": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -5295,26 +5658,31 @@
"width": null
}
},
- "32b556bd8ddc4ef196eba4d1fd6b6b62": {
+ "aa29504327084ed5816d90ca3f9e9f16": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_407cf90293e44890903a4d89ee08008a",
+ "placeholder": "",
+ "style": "IPY_MODEL_18df3037afae470e8ac9d297f93fd9ce",
+ "value": "Generating corpus split: 100%"
}
},
- "4c51bcbf31774cda86465a3ec707831d": {
+ "aa7efcd26d21435a9b951a872d122c25": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -5363,47 +5731,47 @@
"width": null
}
},
- "8bb2cf9274a84b84ac8d20d4d38aaecc": {
+ "ab924099c7cc4d31a60afec68a0ff0d1": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_00a0f8d7b04c495b91b6decf446c50d5",
+ "placeholder": "",
+ "style": "IPY_MODEL_199779c3a63c4632be9c8fa65b7f33d8",
+ "value": " 799/799 [00:00<00:00, 37.0kB/s]"
}
},
- "3ab4015a66824bb3a2374d5a090e4e35": {
+ "abc7b65193fb49f7899b10820992e163": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_63a499ae50bb4c12b4cf90dca53d0a07",
- "IPY_MODEL_9d04d5b70be34666848a0347437cb7ea",
- "IPY_MODEL_ab924099c7cc4d31a60afec68a0ff0d1"
- ],
- "layout": "IPY_MODEL_92bbac1ac3da4331abc4c0afe7fffbd6"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
}
},
- "63a499ae50bb4c12b4cf90dca53d0a07": {
+ "ad2279ff1d4d47068d037ca698005140": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -5415,16 +5783,16 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_bc01b443cb204014bcbcf7cb0fea4c86",
+ "layout": "IPY_MODEL_52e6dbcd90824ad096c1b610123df935",
"placeholder": "",
- "style": "IPY_MODEL_28a64ca9341341449b9e778d73db6321",
- "value": "config.json: 100%"
+ "style": "IPY_MODEL_62450a7fb0e540688ee9ad510a290609",
+ "value": " 712k/712k [00:00<00:00, 3.65MB/s]"
}
},
- "9d04d5b70be34666848a0347437cb7ea": {
+ "ad77277175bb456a9a6ce15af4aa5868": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -5437,568 +5805,49 @@
"bar_style": "success",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_ba10edc885244040ab89498be10bd4db",
- "max": 799,
+ "layout": "IPY_MODEL_498ff77afafa440cb0f6afb39627ec1d",
+ "max": 1112206140,
"min": 0,
"orientation": "horizontal",
- "style": "IPY_MODEL_584e80ac477d4a01b23405a5fa29f092",
- "value": 799
+ "style": "IPY_MODEL_4015861324f64d4ab2b1a7a4153266ff",
+ "value": 1112206140
}
},
- "ab924099c7cc4d31a60afec68a0ff0d1": {
+ "ada52c81a40444eca27763305e25ef92": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_00a0f8d7b04c495b91b6decf446c50d5",
- "placeholder": "",
- "style": "IPY_MODEL_199779c3a63c4632be9c8fa65b7f33d8",
- "value": " 799/799 [00:00<00:00, 37.0kB/s]"
- }
- },
- "92bbac1ac3da4331abc4c0afe7fffbd6": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "bc01b443cb204014bcbcf7cb0fea4c86": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "28a64ca9341341449b9e778d73db6321": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "ba10edc885244040ab89498be10bd4db": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "584e80ac477d4a01b23405a5fa29f092": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "00a0f8d7b04c495b91b6decf446c50d5": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "199779c3a63c4632be9c8fa65b7f33d8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "3e3303826e33485ca844fc82a1035b61": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_c32fbe6f9b39480985e3bff59f6fcccd",
- "IPY_MODEL_ad77277175bb456a9a6ce15af4aa5868",
- "IPY_MODEL_a6b0d04284b748adb9e74530d25589e0"
- ],
- "layout": "IPY_MODEL_2ae210dcb6bd47d584b980d478b254a2"
- }
- },
- "c32fbe6f9b39480985e3bff59f6fcccd": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_e95487a1fdac44fdb47b991d8ba87c3c",
- "placeholder": "",
- "style": "IPY_MODEL_dd3eef846edd4b5382b97ed6dce2c6d6",
- "value": "model.safetensors: 100%"
- }
- },
- "ad77277175bb456a9a6ce15af4aa5868": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_498ff77afafa440cb0f6afb39627ec1d",
- "max": 1112206140,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_4015861324f64d4ab2b1a7a4153266ff",
- "value": 1112206140
- }
- },
- "a6b0d04284b748adb9e74530d25589e0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_7dd04c9ff2b34590ade55890b2f47b88",
- "placeholder": "",
- "style": "IPY_MODEL_c33d33427cc245eebe6e190684304904",
- "value": " 1.11G/1.11G [00:08<00:00, 175MB/s]"
- }
- },
- "2ae210dcb6bd47d584b980d478b254a2": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "e95487a1fdac44fdb47b991d8ba87c3c": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "dd3eef846edd4b5382b97ed6dce2c6d6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
},
- "498ff77afafa440cb0f6afb39627ec1d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "4015861324f64d4ab2b1a7a4153266ff": {
+ "af680e36244e4f9691bb156d01c3b3b8": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
- "bar_color": null,
"description_width": ""
}
},
- "7dd04c9ff2b34590ade55890b2f47b88": {
+ "b1140f9f312441a8a32b8c7a9461baac": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -6037,102 +5886,20 @@
"object_fit": null,
"object_position": null,
"order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "c33d33427cc245eebe6e190684304904": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "4f46a6443368428193090a6a4ddf3473": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_b2c7ed0a7fcd45229194b5422b7801d3",
- "IPY_MODEL_434d6bee7bfc4aa5bab6c2a080a193e5",
- "IPY_MODEL_7e9a93af505a4b1a96c947363bbae0b9"
- ],
- "layout": "IPY_MODEL_453f440ade9946a5b12bfe6cea948368"
- }
- },
- "b2c7ed0a7fcd45229194b5422b7801d3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_c92cde2b50804c18ace89bc605d1d6d1",
- "placeholder": "",
- "style": "IPY_MODEL_1d616b7a849f4a83a0f1d8dd446b96f7",
- "value": "Downloading data: 100%"
- }
- },
- "434d6bee7bfc4aa5bab6c2a080a193e5": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_a938e445c6674332b9d6253358d0e1e0",
- "max": 93304,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_01a1a33b691d427bb5cccce1f4b79693",
- "value": 93304
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "7e9a93af505a4b1a96c947363bbae0b9": {
+ "b2c7ed0a7fcd45229194b5422b7801d3": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -6144,16 +5911,16 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_33ba91ddcbd74a9e870ed3c2ba9f86d0",
+ "layout": "IPY_MODEL_c92cde2b50804c18ace89bc605d1d6d1",
"placeholder": "",
- "style": "IPY_MODEL_803e1962e31b4a918c28e9ff20732313",
- "value": " 93.3k/93.3k [00:00<00:00, 528kB/s]"
+ "style": "IPY_MODEL_1d616b7a849f4a83a0f1d8dd446b96f7",
+ "value": "Downloading data: 100%"
}
},
- "453f440ade9946a5b12bfe6cea948368": {
+ "b5fd41d1dba0476491cb311bd4d47741": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -6202,10 +5969,25 @@
"width": null
}
},
- "c92cde2b50804c18ace89bc605d1d6d1": {
+ "b72560a9b60348e1a2764f24a33188fa": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "b7d5f50552744a528b99873318ee1bfc": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -6254,25 +6036,62 @@
"width": null
}
},
- "1d616b7a849f4a83a0f1d8dd446b96f7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
+ "b7dfac1fc6f047d8a611ad711e18bbe7": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "a938e445c6674332b9d6253358d0e1e0": {
+ "b8f872d00274483c951804adeef7c500": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -6321,26 +6140,46 @@
"width": null
}
},
- "01a1a33b691d427bb5cccce1f4b79693": {
+ "b99e200f694d4e2a83360752c6b5441c": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5a8b708edb414013a6915a9cbbe95f0a",
+ "placeholder": "",
+ "style": "IPY_MODEL_f451f6f10dfc45049f07c44e12b04836",
+ "value": " 5.07M/5.07M [00:01<00:00, 3.74MB/s]"
+ }
+ },
+ "ba01a8212e0741c48d0d49095cfb5c17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
- "bar_color": null,
"description_width": ""
}
},
- "33ba91ddcbd74a9e870ed3c2ba9f86d0": {
+ "ba10edc885244040ab89498be10bd4db": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -6389,47 +6228,10 @@
"width": null
}
},
- "803e1962e31b4a918c28e9ff20732313": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "8a78b64d3f6b4ffdae7e79266d798635": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_bb9b8200e6be4e18a8cf38b14b03e4ce",
- "IPY_MODEL_a83bd13e5c6e4cbb96c2c2f7acfe8423",
- "IPY_MODEL_471ea20537984f18a07b3a198750c3e0"
- ],
- "layout": "IPY_MODEL_f79797ffd6a649b1a0edae63eee91bea"
- }
- },
"bb9b8200e6be4e18a8cf38b14b03e4ce": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -6447,55 +6249,62 @@
"value": "Generating queries split: 100%"
}
},
- "a83bd13e5c6e4cbb96c2c2f7acfe8423": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_e582de00e2af4948b9f072653c787712",
- "max": 1000,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_2887f8a70d8a45d5b633ec2106865a45",
- "value": 1000
- }
- },
- "471ea20537984f18a07b3a198750c3e0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
+ "bbbc8e741a0b44ef835e10fee58bbadf": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_fc2403f083124228befe690caad6dd3d",
- "placeholder": "",
- "style": "IPY_MODEL_a60c75bb501b49e48d09dd50cb645bdd",
- "value": " 1000/1000 [00:00<00:00, 25314.62 examples/s]"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "f79797ffd6a649b1a0edae63eee91bea": {
+ "bc01b443cb204014bcbcf7cb0fea4c86": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -6544,10 +6353,10 @@
"width": null
}
},
- "ea704757587d4b09af079e555d6f57d1": {
+ "bc9715c03c4d41f7b13cdc3fbca26b1b": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -6596,25 +6405,10 @@
"width": null
}
},
- "6dcfc1e9c851445e95b862f30dfc8dee": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "e582de00e2af4948b9f072653c787712": {
+ "c2115e12648343a4b0ca23455c46f9a5": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -6663,26 +6457,70 @@
"width": null
}
},
- "2887f8a70d8a45d5b633ec2106865a45": {
+ "c32fbe6f9b39480985e3bff59f6fcccd": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e95487a1fdac44fdb47b991d8ba87c3c",
+ "placeholder": "",
+ "style": "IPY_MODEL_dd3eef846edd4b5382b97ed6dce2c6d6",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "c33d33427cc245eebe6e190684304904": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
- "bar_color": null,
"description_width": ""
}
},
- "fc2403f083124228befe690caad6dd3d": {
+ "c3be2b813b664e0cb9443c2aa0707afc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_8bd10565e96a46c2882d635a524593f2",
+ "max": 437955512,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_10d4116aace649bbae035c02e13828d8",
+ "value": 437955512
+ }
+ },
+ "c61d80f6b40a4716b18ab7555fe604d1": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -6731,113 +6569,114 @@
"width": null
}
},
- "a60c75bb501b49e48d09dd50cb645bdd": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
+ "c8ebd32170a44ab6bfedee79ea5509ec": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "a3eb172e9d324da1bd7d8914e66d2106": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_166beb1aa15b4927a9c27fda4a8d6de1",
- "IPY_MODEL_f2667dbbb4c5462986c9cad904767540",
- "IPY_MODEL_7fd4ee74216249ae806b5d4045da9523"
- ],
- "layout": "IPY_MODEL_b5fd41d1dba0476491cb311bd4d47741"
- }
- },
- "166beb1aa15b4927a9c27fda4a8d6de1": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_83474dc942a44919a4e48ee36b65f8f6",
- "placeholder": "",
- "style": "IPY_MODEL_77002ce5084c44b8b06987bee947f099",
- "value": "Downloading data: 100%"
- }
- },
- "f2667dbbb4c5462986c9cad904767540": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_dd9ca10fac4447bfaa7bd665a88e1033",
- "max": 19040902,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_27b6a73af53d4cf1946ae2ece8c499e2",
- "value": 19040902
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "7fd4ee74216249ae806b5d4045da9523": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
+ "c92cde2b50804c18ace89bc605d1d6d1": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_bbbc8e741a0b44ef835e10fee58bbadf",
- "placeholder": "",
- "style": "IPY_MODEL_1fbd0891d5a24a54ae54656b0d8a6247",
- "value": " 19.0M/19.0M [00:02<00:00, 7.08MB/s]"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "b5fd41d1dba0476491cb311bd4d47741": {
+ "ca7870dc84ec48c681f6411595f321ad": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -6886,10 +6725,56 @@
"width": null
}
},
- "83474dc942a44919a4e48ee36b65f8f6": {
+ "cc666236572240f8b1015f187a2f66d9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "cc951613d15d49bbb24409d46b06c1b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cef20c5f499646e291329b580cf3800f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d30b92db93594fe8b2f83241bb498f78": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -6938,25 +6823,26 @@
"width": null
}
},
- "77002ce5084c44b8b06987bee947f099": {
+ "d6b75db679df4a849b362c77df481e30": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
},
- "dd9ca10fac4447bfaa7bd665a88e1033": {
+ "d88597e336ad4297a4aa6bd3d7fdc5dd": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -7005,26 +6891,47 @@
"width": null
}
},
- "27b6a73af53d4cf1946ae2ece8c499e2": {
+ "db68c63dcd244ca9b8b391559f8abfd1": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_158d4dd4e4f7495e9d2d6f360c29bf02",
+ "IPY_MODEL_16dd8588f2464d4281c0dde85cc28c6d",
+ "IPY_MODEL_ad2279ff1d4d47068d037ca698005140"
+ ],
+ "layout": "IPY_MODEL_ca7870dc84ec48c681f6411595f321ad"
+ }
+ },
+ "dd3eef846edd4b5382b97ed6dce2c6d6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
- "bar_color": null,
"description_width": ""
}
},
- "bbbc8e741a0b44ef835e10fee58bbadf": {
+ "dd9ca10fac4447bfaa7bd665a88e1033": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -7073,68 +6980,94 @@
"width": null
}
},
- "1fbd0891d5a24a54ae54656b0d8a6247": {
+ "e191dbd0809c47c7ba28d3f6a0fcb1c5": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7410dd99fca940159ba8d13c9c52bae3",
+ "placeholder": "",
+ "style": "IPY_MODEL_6710ae95aea9445ab998afb5d0bb3241",
+ "value": " 443/443 [00:00<00:00, 35.6kB/s]"
}
},
- "501f90bcf1ff4efe81cd377df249415e": {
+ "e2c52e10ab294bc68bcb1caadaf8d0c7": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
+ "_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_aa29504327084ed5816d90ca3f9e9f16",
- "IPY_MODEL_71c123550e2a4166955eef2f142170fb",
- "IPY_MODEL_15b01052fc6140e2be8fae7c2d2928fa"
- ],
- "layout": "IPY_MODEL_8badd98fa463404aa80f604b45f4a912"
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b1140f9f312441a8a32b8c7a9461baac",
+ "max": 79,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_4c955a42756d47eea9a00a87c4b5f0f0",
+ "value": 79
}
},
- "aa29504327084ed5816d90ca3f9e9f16": {
+ "e37f90735fd94698a4172e0292da7c9f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "e409114ff67443ca92cb46ffb0697b58": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
+ "_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_407cf90293e44890903a4d89ee08008a",
- "placeholder": "",
- "style": "IPY_MODEL_18df3037afae470e8ac9d297f93fd9ce",
- "value": "Generating corpus split: 100%"
+ "layout": "IPY_MODEL_944f4a261b5240408ab7fc473c7b0835",
+ "max": 279,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_32b556bd8ddc4ef196eba4d1fd6b6b62",
+ "value": 279
}
},
- "71c123550e2a4166955eef2f142170fb": {
+ "e5486dfd2ecd411eb60f3e3a89b64660": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -7147,18 +7080,70 @@
"bar_style": "success",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_9c0796729bb0455d92a4f418e86fa38a",
- "max": 25657,
+ "layout": "IPY_MODEL_90eac81b19a04354ad842f3fbe87e694",
+ "max": 17098107,
"min": 0,
"orientation": "horizontal",
- "style": "IPY_MODEL_5baf674c98a846e1a79fda9c8ee77e78",
- "value": 25657
+ "style": "IPY_MODEL_cc666236572240f8b1015f187a2f66d9",
+ "value": 17098107
+ }
+ },
+ "e582de00e2af4948b9f072653c787712": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "15b01052fc6140e2be8fae7c2d2928fa": {
+ "e7a255788ec94998924142f4255ce409": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -7170,16 +7155,16 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_11a0fb71fd0e486982215656adcd2bdc",
+ "layout": "IPY_MODEL_d30b92db93594fe8b2f83241bb498f78",
"placeholder": "",
- "style": "IPY_MODEL_5be1ec5880cb459fb7a88ae7c1f2394f",
- "value": " 25657/25657 [00:00<00:00, 77019.91 examples/s]"
+ "style": "IPY_MODEL_9f679d0c8c5e4bf59111200f955ae8d7",
+ "value": "special_tokens_map.json: 100%"
}
},
- "8badd98fa463404aa80f604b45f4a912": {
+ "e95487a1fdac44fdb47b991d8ba87c3c": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -7228,10 +7213,10 @@
"width": null
}
},
- "407cf90293e44890903a4d89ee08008a": {
+ "ea704757587d4b09af079e555d6f57d1": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -7280,10 +7265,10 @@
"width": null
}
},
- "18df3037afae470e8ac9d297f93fd9ce": {
+ "eb6fa6ee9e74440bb4ce2a92ee4548c7": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -7295,10 +7280,31 @@
"description_width": ""
}
},
- "9c0796729bb0455d92a4f418e86fa38a": {
+ "f1dd6f744bf34a3aa1ecd72115f63155": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5aae76de0c3b42fb81642990d8bbdf93",
+ "placeholder": "",
+ "style": "IPY_MODEL_cef20c5f499646e291329b580cf3800f",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "f22ae0abe3cb40a5b97559c7216400e7": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -7347,26 +7353,34 @@
"width": null
}
},
- "5baf674c98a846e1a79fda9c8ee77e78": {
+ "f2667dbbb4c5462986c9cad904767540": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "FloatProgressModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_dd9ca10fac4447bfaa7bd665a88e1033",
+ "max": 19040902,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_27b6a73af53d4cf1946ae2ece8c499e2",
+ "value": 19040902
}
},
- "11a0fb71fd0e486982215656adcd2bdc": {
+ "f2f00b1f73954d95b95889fa1a34c5ae": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -7415,10 +7429,10 @@
"width": null
}
},
- "5be1ec5880cb459fb7a88ae7c1f2394f": {
+ "f451f6f10dfc45049f07c44e12b04836": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -7432,8 +7446,8 @@
},
"f6b5cd5ff9704a58986eff2c9c88db4c": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -7452,76 +7466,10 @@
"layout": "IPY_MODEL_5cd148642750417abd38cbf483ccf1f9"
}
},
- "691424012c59434f8cc17f3d6aa001f3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_8cad72468680488aa62c33186cedf084",
- "placeholder": "",
- "style": "IPY_MODEL_21b3538c53cd4ff5895a817782884101",
- "value": "100%"
- }
- },
- "e2c52e10ab294bc68bcb1caadaf8d0c7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_b1140f9f312441a8a32b8c7a9461baac",
- "max": 79,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_4c955a42756d47eea9a00a87c4b5f0f0",
- "value": 79
- }
- },
- "ffba63bef7e944b7923b2b29f9495527": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_b8f872d00274483c951804adeef7c500",
- "placeholder": "",
- "style": "IPY_MODEL_cc951613d15d49bbb24409d46b06c1b6",
- "value": " 79/79 [06:46<00:00, 4.08s/it]"
- }
- },
- "5cd148642750417abd38cbf483ccf1f9": {
+ "f79797ffd6a649b1a0edae63eee91bea": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -7570,10 +7518,10 @@
"width": null
}
},
- "8cad72468680488aa62c33186cedf084": {
+ "f918256ef4874941a1ec098ea5050f6a": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -7622,25 +7570,10 @@
"width": null
}
},
- "21b3538c53cd4ff5895a817782884101": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "b1140f9f312441a8a32b8c7a9461baac": {
+ "fb03d4b1113a42d1914557a26058d82f": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -7689,26 +7622,62 @@
"width": null
}
},
- "4c955a42756d47eea9a00a87c4b5f0f0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
+ "fc2403f083124228befe690caad6dd3d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
}
},
- "b8f872d00274483c951804adeef7c500": {
+ "fd80d3b519124a759df834da4af06967": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -7757,19 +7726,25 @@
"width": null
}
},
- "cc951613d15d49bbb24409d46b06c1b6": {
+ "ffba63bef7e944b7923b2b29f9495527": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HTMLModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b8f872d00274483c951804adeef7c500",
+ "placeholder": "",
+ "style": "IPY_MODEL_cc951613d15d49bbb24409d46b06c1b6",
+ "value": " 79/79 [06:46<00:00, 4.08s/it]"
}
}
}
diff --git a/examples/audio_search/main.ipynb b/examples/audio_search/main.ipynb
index 49cab5ce..ce19c4cd 100644
--- a/examples/audio_search/main.ipynb
+++ b/examples/audio_search/main.ipynb
@@ -1,959 +1,951 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "3lhhVh6TWRjq"
- },
- "source": [
- "# Audio Similarity Search using Vector Embeddings\n",
- "This notebook demonstrates how to create vector embeddings of audio files to store into the LanceDB vector store, and then to find similar audio files.\n",
- "We will be using [panns_inference package](https://github.com/qiuqiangkong/panns_inference) to tag the audio and create embeddings. We'll also be using this [HuggingFace dataset](https://huggingface.co/datasets/ashraq/esc50) for the audio files. The dataset contains 2,000 sounds and labels."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Installing dependencies"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "Pks8RDrdWRjt",
- "outputId": "387f9c04-f6c5-42ec-f7ba-87a3ae654162"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Collecting datasets\n",
- " Downloading datasets-2.14.6-py3-none-any.whl (493 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m493.7/493.7 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.23.5)\n",
- "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n",
- "Collecting dill<0.3.8,>=0.3.0 (from datasets)\n",
- " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n",
- "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n",
- "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.1)\n",
- "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.4.1)\n",
- "Collecting multiprocess (from datasets)\n",
- " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n",
- "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.8.6)\n",
- "Collecting huggingface-hub<1.0.0,>=0.14.0 (from datasets)\n",
- " Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.2)\n",
- "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n",
- "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n",
- "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (3.3.0)\n",
- "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n",
- "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
- "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.2)\n",
- "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n",
- "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
- "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.12.4)\n",
- "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.5.0)\n",
- "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.4)\n",
- "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.7)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2023.7.22)\n",
- "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
- "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3.post1)\n",
- "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n",
- "Installing collected packages: dill, multiprocess, huggingface-hub, datasets\n",
- "Successfully installed datasets-2.14.6 dill-0.3.7 huggingface-hub-0.18.0 multiprocess-0.70.15\n",
- "Collecting lancedb\n",
- " Downloading lancedb-0.3.1-py3-none-any.whl (60 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.4/60.4 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting deprecation (from lancedb)\n",
- " Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)\n",
- "Collecting pylance==0.8.3 (from lancedb)\n",
- " Downloading pylance-0.8.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.3 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m38.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting ratelimiter~=1.0 (from lancedb)\n",
- " Downloading ratelimiter-1.2.0.post0-py3-none-any.whl (6.6 kB)\n",
- "Collecting retry>=0.9.2 (from lancedb)\n",
- " Downloading retry-0.9.2-py2.py3-none-any.whl (8.0 kB)\n",
- "Requirement already satisfied: tqdm>=4.1.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (4.66.1)\n",
- "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from lancedb) (3.8.6)\n",
- "Requirement already satisfied: pydantic>=1.10 in /usr/local/lib/python3.10/dist-packages (from lancedb) (1.10.13)\n",
- "Requirement already satisfied: attrs>=21.3.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (23.1.0)\n",
- "Collecting semver>=3.0 (from lancedb)\n",
- " Downloading semver-3.0.2-py3-none-any.whl (17 kB)\n",
- "Requirement already satisfied: cachetools in /usr/local/lib/python3.10/dist-packages (from lancedb) (5.3.1)\n",
- "Requirement already satisfied: pyyaml>=6.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (6.0.1)\n",
- "Requirement already satisfied: click>=8.1.7 in /usr/local/lib/python3.10/dist-packages (from lancedb) (8.1.7)\n",
- "Requirement already satisfied: requests>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.31.0)\n",
- "Collecting pyarrow>=10 (from pylance==0.8.3->lancedb)\n",
- " Downloading pyarrow-13.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.0 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.0/40.0 MB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from pylance==0.8.3->lancedb) (1.23.5)\n",
- "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (4.5.0)\n",
- "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.3.0)\n",
- "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.4)\n",
- "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2.0.7)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2023.7.22)\n",
- "Requirement already satisfied: decorator>=3.4.2 in /usr/local/lib/python3.10/dist-packages (from retry>=0.9.2->lancedb) (4.4.2)\n",
- "Collecting py<2.0.0,>=1.4.26 (from retry>=0.9.2->lancedb)\n",
- " Downloading py-1.11.0-py2.py3-none-any.whl (98 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->lancedb) (6.0.4)\n",
- "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->lancedb) (4.0.3)\n",
- "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->lancedb) (1.9.2)\n",
- "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->lancedb) (1.4.0)\n",
- "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->lancedb) (1.3.1)\n",
- "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from deprecation->lancedb) (23.2)\n",
- "Installing collected packages: ratelimiter, semver, pyarrow, py, deprecation, retry, pylance, lancedb\n",
- " Attempting uninstall: pyarrow\n",
- " Found existing installation: pyarrow 9.0.0\n",
- " Uninstalling pyarrow-9.0.0:\n",
- " Successfully uninstalled pyarrow-9.0.0\n",
- "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "ibis-framework 6.2.0 requires pyarrow<13,>=2, but you have pyarrow 13.0.0 which is incompatible.\n",
- "pandas-gbq 0.17.9 requires pyarrow<10.0dev,>=3.0.0, but you have pyarrow 13.0.0 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0mSuccessfully installed deprecation-2.1.0 lancedb-0.3.1 py-1.11.0 pyarrow-13.0.0 pylance-0.8.3 ratelimiter-1.2.0.post0 retry-0.9.2 semver-3.0.2\n"
- ]
- }
- ],
- "source": [
- "!pip install panns-inference tqdm --q\n",
- "!pip3 install datasets\n",
- "!pip install lancedb"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Importing all the libraries"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "id": "hToUqkBBWto1"
- },
- "outputs": [],
- "source": [
- "import lancedb"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "fF08IHEDalKU"
- },
- "source": [
- "**NOTE** : if you get any error while importing lancedb just you need to restart runtime"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "id": "0jIb2Gr8WRju"
- },
- "outputs": [],
- "source": [
- "from datasets import load_dataset\n",
- "from panns_inference import AudioTagging\n",
- "from tqdm import tqdm\n",
- "from IPython.display import Audio, display\n",
- "import numpy as np"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "x6QfsfHlWRju"
- },
- "source": [
- "On devices that have CUDA installed, you may be able to install torch's CUDA supported version.\n",
- "```bash\n",
- "pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n",
- "```\n",
- "If you don't have CUDA or a GPU (or different os), you can install torch here: https://pytorch.org/get-started/locally/"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Load data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "fyjp-ffQWRjv",
- "outputId": "edb7fdfa-27e7-4b00-fa2d-409bbf1d23b8"
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Repo card metadata block was not found. Setting CardData to empty.\n",
- "WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Checkpoint path: /root/panns_data/Cnn14_mAP=0.431.pth\n",
- "GPU number: 1\n"
- ]
- }
- ],
- "source": [
- "dataset = load_dataset(\"ashraq/esc50\", split=\"train\")\n",
- "at = AudioTagging(checkpoint_path=None, device=\"cuda\") # device=\"cpu\" for CPU inference"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "Zm9Qz9WVWRjv",
- "outputId": "4cfd5f6d-3d83-4930-ceaf-9cd4c80eb774"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Dataset({\n",
- " features: ['filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take', 'audio'],\n",
- " num_rows: 2000\n",
- "})"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "dataset"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "INND51clWRjv"
- },
- "source": [
- "### Create Embeddings\n",
- "Now, to create the data embeddings! We can start by creating batches of 70 for the data, keeping track of the most important columns: `category` and `audio`."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "id": "VKflK56YWRjv"
- },
- "outputs": [],
- "source": [
- "batches = [batch[\"audio\"] for batch in dataset.iter(50)]\n",
- "meta_batches = [batch[\"category\"] for batch in dataset.iter(50)]\n",
- "audio_data = [np.array([audio[\"array\"] for audio in batch]) for batch in batches]\n",
- "meta_data = [np.array([meta for meta in batch]) for batch in meta_batches]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "B4mB3sa2WRjw"
- },
- "source": [
- "We now want to iterate through these batches, and for each audio file, we want to use the AudioTagging embedder to extract the embedding. Then, we can store these embeddings, audio files, and category name into a list of dictionaries. Each dictionary has to contain a `vector` column in order to add to the LanceDB table, if no embedding function is provided."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "pdt1n8S7WRjw",
- "outputId": "96d4b5c6-b1c2-497f-c35f-d5905548f6f0"
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 40/40 [00:19<00:00, 2.06it/s]\n"
- ]
- }
- ],
- "source": [
- "for i in tqdm(range(len(audio_data))):\n",
- " (_, embedding) = at.inference(audio_data[i])\n",
- " data = [\n",
- " {\n",
- " \"audio\": x[0][\"array\"],\n",
- " \"vector\": x[1],\n",
- " \"sampling_rate\": x[0][\"sampling_rate\"],\n",
- " \"category\": meta_data[i][j],\n",
- " }\n",
- " for j, x in enumerate(zip(batches[i], embedding))\n",
- " ]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "CRpnHjJbWRjw"
- },
- "source": [
- "Once we have this data list, we can create a LanceDB table by first connecting to a certain directory before, and then calling `db.create_table()`. If the table already exists, we open the table and add the data."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Add the VectorStore"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "id": "3lh_d6m3WRjw"
- },
- "outputs": [],
- "source": [
- "# Connect to directory at the top of the file\n",
- "db = lancedb.connect(\"data/audio-lancedb\")\n",
- "table_name = \"audio-search\"\n",
- "\n",
- "if table_name not in db.table_names():\n",
- " tbl = db.create_table(table_name, data)\n",
- "else:\n",
- " tbl = db.open_table(table_name)\n",
- " tbl.add(data)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "m7WfeIv8WRjw"
- },
- "source": [
- "We can now combine all of this into a single function:"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Composite function"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "id": "XnCHvlLsWRjw"
- },
- "outputs": [],
- "source": [
- "def insert_audio():\n",
- " batches = [batch[\"audio\"] for batch in dataset.iter(20)]\n",
- " meta_batches = [batch[\"category\"] for batch in dataset.iter(20)]\n",
- " audio_data = [np.array([audio[\"array\"] for audio in batch]) for batch in batches]\n",
- " meta_data = [np.array([meta for meta in batch]) for batch in meta_batches]\n",
- " print(\"Start\")\n",
- " for i in tqdm(range(len(audio_data))):\n",
- " (_, embedding) = at.inference(audio_data[i])\n",
- " data = [\n",
- " {\n",
- " \"audio\": x[0][\"array\"],\n",
- " \"vector\": x[1],\n",
- " \"sampling_rate\": x[0][\"sampling_rate\"],\n",
- " \"category\": meta_data[i][j],\n",
- " }\n",
- " for j, x in enumerate(zip(batches[i], embedding))\n",
- " ]\n",
- " if table_name not in db.table_names():\n",
- " tbl = db.create_table(table_name, data)\n",
- " else:\n",
- " tbl = db.open_table(table_name)\n",
- " tbl.add(data)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "id": "UvEhnuLyWRjw"
- },
- "outputs": [],
- "source": [
- "import shutil\n",
- "\n",
- "shutil.rmtree(\"data/audio-lancedb/audio-search.lance\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "TXxGHwZdgZrG"
- },
- "outputs": [],
- "source": [
- "insert_audio()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "vr9LehNiiUNb"
- },
- "source": [
- "NOTE: if you get out of ram .next time simply run all cells & uncomment this lines #insert_audio"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "mPBphF19WRjx"
- },
- "source": [
- "Great! We now have a fully populated table with all the necessary information. The next step would be to query the table and find those similar audio files. We can do this by first opening the table, and then getting the specific audio file we want to search for."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Query the database"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 93
- },
- "id": "ZsGYl6YSWRjx",
- "outputId": "8cc83527-0540-47aa-99b5-054530cf5615"
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3lhhVh6TWRjq"
+ },
+ "source": [
+ "# Audio Similarity Search using Vector Embeddings\n",
+ "This notebook demonstrates how to create vector embeddings of audio files to store into the LanceDB vector store, and then to find similar audio files.\n",
+ "We will be using [panns_inference package](https://github.com/qiuqiangkong/panns_inference) to tag the audio and create embeddings. We'll also be using this [HuggingFace dataset](https://huggingface.co/datasets/ashraq/esc50) for the audio files. The dataset contains 2,000 sounds and labels."
]
- },
- "metadata": {},
- "output_type": "display_data"
},
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Category: water_drops\n"
- ]
- }
- ],
- "source": [
- "tbl = db.open_table(table_name)\n",
- "audio = dataset[50][\"audio\"][\"array\"]\n",
- "category = dataset[50][\"category\"]\n",
- "display(Audio(audio, rate=dataset[50][\"audio\"][\"sampling_rate\"]))\n",
- "print(\"Category:\", category)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "Et2C9t87WRjx"
- },
- "source": [
- "Next, we call the embedding function again to create those embeddings, which would allow us to search our table."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "ZmXOqB2FWRjx",
- "outputId": "05659b4c-acb6-4514-e3c9-d96ecdf84f1a"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " audio \\\n",
- "0 [0.00506591796875, 0.00653076171875, 0.0051574... \n",
- "1 [-0.157318115234375, -0.122344970703125, -0.17... \n",
- "2 [-0.0162353515625, -0.015716552734375, -0.0150... \n",
- "3 [-0.0008544921875, -0.000762939453125, -0.0005... \n",
- "4 [-0.003753662109375, -0.004119873046875, -0.00... \n",
- "\n",
- " vector sampling_rate \\\n",
- "0 [0.0, 0.70255554, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n",
- "1 [0.0, 0.68818694, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n",
- "2 [0.0, 0.58163136, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n",
- "3 [0.0, 1.0475253, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... 44100 \n",
- "4 [0.0, 0.45124823, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n",
- "\n",
- " category _distance \n",
- "0 water_drops 52.260368 \n",
- "1 water_drops 57.536537 \n",
- "2 water_drops 75.637558 \n",
- "3 drinking_sipping 76.979111 \n",
- "4 water_drops 77.981865 \n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- ":2: DeprecatedWarning: to_df is deprecated as of 0.3.1 and will be removed in 0.4.0. Use the bar function instead\n",
- " result = tbl.search(embedding[0]).limit(5).to_df()\n"
- ]
- }
- ],
- "source": [
- "(_, embedding) = at.inference(audio[None, :])\n",
- "result = tbl.search(embedding[0]).limit(5).to_df()\n",
- "print(result)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 396
- },
- "id": "enl39Zp8WRjx",
- "outputId": "305805b6-1540-4708-8345-071083221c80"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0. Category: water_drops\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "is73uCkAZLBj"
+ },
+ "source": [
+ "### Installing dependencies"
]
- },
- "metadata": {},
- "output_type": "display_data"
},
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1. Category: water_drops\n"
- ]
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Pks8RDrdWRjt",
+ "outputId": "c66c58b2-4f84-4b9c-e563-acda96e620cd"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (2.17.1)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.13.1)\n",
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.25.2)\n",
+ "Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (14.0.2)\n",
+ "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets) (0.6)\n",
+ "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n",
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n",
+ "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n",
+ "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.2)\n",
+ "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.4.1)\n",
+ "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n",
+ "Requirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n",
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.3)\n",
+ "Requirement already satisfied: huggingface-hub>=0.19.4 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.20.3)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.2)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.2.0)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.5)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.4)\n",
+ "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.4->datasets) (4.10.0)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.3.2)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.6)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.7)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2024.2.2)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.4)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n",
+ "Requirement already satisfied: lancedb in /usr/local/lib/python3.10/dist-packages (0.6.1)\n",
+ "Requirement already satisfied: deprecation in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.1.0)\n",
+ "Requirement already satisfied: pylance==0.10.1 in /usr/local/lib/python3.10/dist-packages (from lancedb) (0.10.1)\n",
+ "Requirement already satisfied: ratelimiter~=1.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (1.2.0.post0)\n",
+ "Requirement already satisfied: retry>=0.9.2 in /usr/local/lib/python3.10/dist-packages (from lancedb) (0.9.2)\n",
+ "Requirement already satisfied: tqdm>=4.27.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (4.66.2)\n",
+ "Requirement already satisfied: pydantic>=1.10 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.6.3)\n",
+ "Requirement already satisfied: attrs>=21.3.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (23.2.0)\n",
+ "Requirement already satisfied: semver>=3.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (3.0.2)\n",
+ "Requirement already satisfied: cachetools in /usr/local/lib/python3.10/dist-packages (from lancedb) (5.3.3)\n",
+ "Requirement already satisfied: pyyaml>=6.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (6.0.1)\n",
+ "Requirement already satisfied: click>=8.1.7 in /usr/local/lib/python3.10/dist-packages (from lancedb) (8.1.7)\n",
+ "Requirement already satisfied: requests>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.31.0)\n",
+ "Requirement already satisfied: overrides>=0.7 in /usr/local/lib/python3.10/dist-packages (from lancedb) (7.7.0)\n",
+ "Requirement already satisfied: pyarrow>=12 in /usr/local/lib/python3.10/dist-packages (from pylance==0.10.1->lancedb) (14.0.2)\n",
+ "Requirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from pylance==0.10.1->lancedb) (1.25.2)\n",
+ "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (0.6.0)\n",
+ "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (2.16.3)\n",
+ "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (4.10.0)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.3.2)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.6)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2.0.7)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2024.2.2)\n",
+ "Requirement already satisfied: decorator>=3.4.2 in /usr/local/lib/python3.10/dist-packages (from retry>=0.9.2->lancedb) (4.4.2)\n",
+ "Requirement already satisfied: py<2.0.0,>=1.4.26 in /usr/local/lib/python3.10/dist-packages (from retry>=0.9.2->lancedb) (1.11.0)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from deprecation->lancedb) (23.2)\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install panns-inference tqdm --q\n",
+ "!pip3 install datasets\n",
+ "!pip install lancedb"
+ ]
},
{
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ZJsz8MnDZLBn"
+ },
+ "source": [
+ "### Importing all the libraries"
]
- },
- "metadata": {},
- "output_type": "display_data"
},
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2. Category: water_drops\n"
- ]
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "id": "hToUqkBBWto1"
+ },
+ "outputs": [],
+ "source": [
+ "import lancedb"
+ ]
},
{
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "fF08IHEDalKU"
+ },
+ "source": [
+ "**NOTE** : if you get any error while importing lancedb just you need to restart runtime"
]
- },
- "metadata": {},
- "output_type": "display_data"
},
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "3. Category: drinking_sipping\n"
- ]
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "id": "0jIb2Gr8WRju"
+ },
+ "outputs": [],
+ "source": [
+ "from datasets import load_dataset\n",
+ "from panns_inference import AudioTagging\n",
+ "from tqdm import tqdm\n",
+ "from IPython.display import Audio, display\n",
+ "import numpy as np"
+ ]
},
{
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "x6QfsfHlWRju"
+ },
+ "source": [
+ "On devices that have CUDA installed, you may be able to install torch's CUDA supported version.\n",
+ "```bash\n",
+ "pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n",
+ "```\n",
+ "If you don't have CUDA or a GPU (or different os), you can install torch here: https://pytorch.org/get-started/locally/"
]
- },
- "metadata": {},
- "output_type": "display_data"
},
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "4. Category: water_drops\n"
- ]
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "mMy-7PPNZLBr"
+ },
+ "source": [
+ "### Load data"
+ ]
},
{
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "fyjp-ffQWRjv",
+ "outputId": "0eb8ecb4-aed5-453a-96e4-d956645e4555"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n",
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/repocard.py:105: UserWarning: Repo card metadata block was not found. Setting CardData to empty.\n",
+ " warnings.warn(\"Repo card metadata block was not found. Setting CardData to empty.\")\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Checkpoint path: /root/panns_data/Cnn14_mAP=0.431.pth\n",
+ "GPU number: 1\n"
+ ]
+ }
],
- "text/plain": [
- ""
+ "source": [
+ "dataset = load_dataset(\"ashraq/esc50\", split=\"train\")\n",
+ "at = AudioTagging(checkpoint_path=None, device=\"cuda\") # device=\"cpu\" for CPU inference"
]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "for i in range(len(result)):\n",
- " print(str(i) + \". Category:\", result[\"category\"][i])\n",
- " display(Audio(result[\"audio\"][i], rate=result[\"sampling_rate\"][i]))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "mZtR0bxXWRjx"
- },
- "source": [
- "Nice! It seems to be working! We can compile this into another function here, that takes an `id` of the audio from 0 to 1,999."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Search Audio using IDs"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "id": "wc1X8MuDWRjx"
- },
- "outputs": [],
- "source": [
- "def search_audio(id):\n",
- " tbl = db.open_table(table_name)\n",
- " audio = dataset[id][\"audio\"][\"array\"]\n",
- " category = dataset[id][\"category\"]\n",
- " display(Audio(audio, rate=dataset[id][\"audio\"][\"sampling_rate\"]))\n",
- " print(\"Category:\", category)\n",
- "\n",
- " (_, embedding) = at.inference(audio[None, :])\n",
- " result = tbl.search(embedding[0]).limit(5).to_df()\n",
- " print(result)\n",
- " for i in range(len(result)):\n",
- " print(str(i) + \". Category:\", result[\"category\"][i])\n",
- " display(Audio(result[\"audio\"][i], rate=result[\"sampling_rate\"][i]))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 873
- },
- "id": "dQYVac1kWRjx",
- "outputId": "4dd2f8c9-dfb0-475d-97e3-3a82398ee0fd"
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Zm9Qz9WVWRjv",
+ "outputId": "dcbdce06-309d-45cb-997c-37c89d9b6cc3"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Dataset({\n",
+ " features: ['filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take', 'audio'],\n",
+ " num_rows: 2000\n",
+ "})"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 5
+ }
],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Category: car_horn\n",
- " audio \\\n",
- "0 [-0.022979736328125, -0.021820068359375, -0.02... \n",
- "1 [0.313934326171875, 0.312774658203125, 0.31698... \n",
- "2 [0.0655517578125, 0.011505126953125, -0.024536... \n",
- "3 [0.063690185546875, 0.065216064453125, 0.07296... \n",
- "4 [-0.006866455078125, -0.007476806640625, -0.00... \n",
- "\n",
- " vector sampling_rate \\\n",
- "0 [0.0, 0.12407931, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n",
- "1 [0.0, 0.5878662, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... 44100 \n",
- "2 [0.0, 0.7369921, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... 44100 \n",
- "3 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... 44100 \n",
- "4 [0.0, 0.42053863, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n",
- "\n",
- " category _distance \n",
- "0 airplane 85.660744 \n",
- "1 washing_machine 91.059021 \n",
- "2 vacuum_cleaner 110.453613 \n",
- "3 clapping 111.933456 \n",
- "4 footsteps 115.770416 \n",
- "0. Category: airplane\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- ":9: DeprecatedWarning: to_df is deprecated as of 0.3.1 and will be removed in 0.4.0. Use the bar function instead\n",
- " result = tbl.search(embedding[0]).limit(5).to_df()\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
+ "source": [
+ "dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "INND51clWRjv"
+ },
+ "source": [
+ "### Create Embeddings\n",
+ "Now, to create the data embeddings! We can start by creating batches of 70 for the data, keeping track of the most important columns: `category` and `audio`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "id": "VKflK56YWRjv"
+ },
+ "outputs": [],
+ "source": [
+ "batches = [batch[\"audio\"] for batch in dataset.iter(50)]\n",
+ "meta_batches = [batch[\"category\"] for batch in dataset.iter(50)]\n",
+ "audio_data = [np.array([audio[\"array\"] for audio in batch]) for batch in batches]\n",
+ "meta_data = [np.array([meta for meta in batch]) for batch in meta_batches]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "B4mB3sa2WRjw"
+ },
+ "source": [
+ "We now want to iterate through these batches, and for each audio file, we want to use the AudioTagging embedder to extract the embedding. Then, we can store these embeddings, audio files, and category name into a list of dictionaries. Each dictionary has to contain a `vector` column in order to add to the LanceDB table, if no embedding function is provided."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "pdt1n8S7WRjw",
+ "outputId": "26abb853-33b2-4a86-a41a-6f188e7d4d46"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 40/40 [00:13<00:00, 2.99it/s]\n"
+ ]
+ }
],
- "text/plain": [
- ""
+ "source": [
+ "for i in tqdm(range(len(audio_data))):\n",
+ " (_, embedding) = at.inference(audio_data[i])\n",
+ " data = [\n",
+ " {\n",
+ " \"audio\": x[0][\"array\"],\n",
+ " \"vector\": x[1],\n",
+ " \"sampling_rate\": x[0][\"sampling_rate\"],\n",
+ " \"category\": meta_data[i][j],\n",
+ " }\n",
+ " for j, x in enumerate(zip(batches[i], embedding))\n",
+ " ]"
]
- },
- "metadata": {},
- "output_type": "display_data"
},
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1. Category: washing_machine\n"
- ]
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "CRpnHjJbWRjw"
+ },
+ "source": [
+ "Once we have this data list, we can create a LanceDB table by first connecting to a certain directory before, and then calling `db.create_table()`. If the table already exists, we open the table and add the data."
+ ]
},
{
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "PDGjLT4UZLBu"
+ },
+ "source": [
+ "### Add the VectorStore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "id": "3lh_d6m3WRjw",
+ "outputId": "691acc64-5791-42b4-9f0f-3992f54b62da",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Created Table\n"
+ ]
+ }
],
- "text/plain": [
- ""
+ "source": [
+ "# Connect to directory at the top of the file\n",
+ "db = lancedb.connect(\"data/audio-lancedb\")\n",
+ "table_name = \"audio-search\"\n",
+ "\n",
+ "if table_name not in db.table_names():\n",
+ " print(\"Created Table\")\n",
+ " tbl = db.create_table(table_name, data)\n",
+ "else:\n",
+ " print(\"Inserting data\")\n",
+ " tbl = db.open_table(table_name)\n",
+ " tbl.add(data)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "m7WfeIv8WRjw"
+ },
+ "source": [
+ "We can now combine all of this into a single function:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ARFCQPPjZLBu"
+ },
+ "source": [
+ "### Composite function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "id": "XnCHvlLsWRjw"
+ },
+ "outputs": [],
+ "source": [
+ "def insert_audio():\n",
+ " batches = [batch[\"audio\"] for batch in dataset.iter(20)]\n",
+ " meta_batches = [batch[\"category\"] for batch in dataset.iter(20)]\n",
+ " audio_data = [np.array([audio[\"array\"] for audio in batch]) for batch in batches]\n",
+ " meta_data = [np.array([meta for meta in batch]) for batch in meta_batches]\n",
+ " print(\"Start\")\n",
+ " for i in tqdm(range(len(audio_data))):\n",
+ " (_, embedding) = at.inference(audio_data[i])\n",
+ " data = [\n",
+ " {\n",
+ " \"audio\": x[0][\"array\"],\n",
+ " \"vector\": x[1],\n",
+ " \"sampling_rate\": x[0][\"sampling_rate\"],\n",
+ " \"category\": meta_data[i][j],\n",
+ " }\n",
+ " for j, x in enumerate(zip(batches[i], embedding))\n",
+ " ]\n",
+ " if table_name not in db.table_names():\n",
+ " tbl = db.create_table(table_name, data)\n",
+ " else:\n",
+ " tbl = db.open_table(table_name)\n",
+ " tbl.add(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "id": "UvEhnuLyWRjw"
+ },
+ "outputs": [],
+ "source": [
+ "import shutil\n",
+ "\n",
+ "shutil.rmtree(\"data/audio-lancedb/audio-search.lance\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "vr9LehNiiUNb"
+ },
+ "source": [
+ "NOTE: if you get out of memory, then next time Run all cells & uncomment this lines #insert_audio()"
]
- },
- "metadata": {},
- "output_type": "display_data"
},
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2. Category: vacuum_cleaner\n"
- ]
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "TXxGHwZdgZrG"
+ },
+ "outputs": [],
+ "source": [
+ "# insert_audio()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "mPBphF19WRjx"
+ },
+ "source": [
+ "Great! We now have a fully populated table with all the necessary information. The next step would be to query the table and find those similar audio files. We can do this by first opening the table, and then getting the specific audio file we want to search for."
+ ]
},
{
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7B-mrGM6ZLBy"
+ },
+ "source": [
+ "### Query the database"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 93
+ },
+ "id": "ZsGYl6YSWRjx",
+ "outputId": "7a743814-a168-4fb7-84d4-4c303c55ccea"
+ },
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Category: water_drops\n"
+ ]
+ }
],
- "text/plain": [
- ""
+ "source": [
+ "tbl = db.open_table(table_name)\n",
+ "audio = dataset[50][\"audio\"][\"array\"]\n",
+ "category = dataset[50][\"category\"]\n",
+ "display(Audio(audio, rate=dataset[50][\"audio\"][\"sampling_rate\"]))\n",
+ "print(\"Category:\", category)"
]
- },
- "metadata": {},
- "output_type": "display_data"
},
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "3. Category: clapping\n"
- ]
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Et2C9t87WRjx"
+ },
+ "source": [
+ "Next, we call the embedding function again to create those embeddings, which would allow us to search our table."
+ ]
},
{
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ZmXOqB2FWRjx",
+ "outputId": "ed6c36a6-66a7-440d-f8fa-c693e61df0b2"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " audio \\\n",
+ "0 [0.00506591796875, 0.00653076171875, 0.0051574... \n",
+ "1 [-0.157318115234375, -0.122344970703125, -0.17... \n",
+ "2 [-0.0162353515625, -0.015716552734375, -0.0150... \n",
+ "3 [-0.0008544921875, -0.000762939453125, -0.0005... \n",
+ "4 [-0.003753662109375, -0.004119873046875, -0.00... \n",
+ "\n",
+ " vector sampling_rate \\\n",
+ "0 [0.0, 0.70255554, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n",
+ "1 [0.0, 0.68818694, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n",
+ "2 [0.0, 0.58163136, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n",
+ "3 [0.0, 1.0475253, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... 44100 \n",
+ "4 [0.0, 0.45124823, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n",
+ "\n",
+ " category _distance \n",
+ "0 water_drops 52.260319 \n",
+ "1 water_drops 57.536579 \n",
+ "2 water_drops 75.637405 \n",
+ "3 drinking_sipping 76.979073 \n",
+ "4 water_drops 77.981728 \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ ":2: UnsupportedWarning: to_df is unsupported as of 0.4.0. Use to_pandas() instead\n",
+ " result = tbl.search(embedding[0]).limit(5).to_df()\n"
+ ]
+ }
],
- "text/plain": [
- ""
+ "source": [
+ "(_, embedding) = at.inference(audio[None, :])\n",
+ "result = tbl.search(embedding[0]).limit(5).to_df()\n",
+ "print(result)"
]
- },
- "metadata": {},
- "output_type": "display_data"
},
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "4. Category: footsteps\n"
- ]
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 396
+ },
+ "id": "enl39Zp8WRjx",
+ "outputId": "296de741-d483-4471-92f4-a263abf1d262"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0. Category: water_drops\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "1. Category: water_drops\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "2. Category: water_drops\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "3. Category: drinking_sipping\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "4. Category: water_drops\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "for i in range(len(result)):\n",
+ " print(str(i) + \". Category:\", result[\"category\"][i])\n",
+ " display(Audio(result[\"audio\"][i], rate=result[\"sampling_rate\"][i]))"
+ ]
},
{
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "mZtR0bxXWRjx"
+ },
+ "source": [
+ "Nice! It seems to be working! We can compile this into another function here, that takes an `id` of the audio from 0 to 1,999."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "OPrn-NAYZLB0"
+ },
+ "source": [
+ "### Search Audio using IDs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "id": "wc1X8MuDWRjx"
+ },
+ "outputs": [],
+ "source": [
+ "def search_audio(id):\n",
+ " tbl = db.open_table(table_name)\n",
+ " audio = dataset[id][\"audio\"][\"array\"]\n",
+ " category = dataset[id][\"category\"]\n",
+ " display(Audio(audio, rate=dataset[id][\"audio\"][\"sampling_rate\"]))\n",
+ " print(\"Category:\", category)\n",
+ "\n",
+ " (_, embedding) = at.inference(audio[None, :])\n",
+ " result = tbl.search(embedding[0]).limit(5).to_df()\n",
+ " print(result)\n",
+ " for i in range(len(result)):\n",
+ " print(str(i) + \". Category:\", result[\"category\"][i])\n",
+ " display(Audio(result[\"audio\"][i], rate=result[\"sampling_rate\"][i]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 853
+ },
+ "id": "dQYVac1kWRjx",
+ "outputId": "a1ea8e7d-acee-4bb4-a008-6ee90d097cc8"
+ },
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Category: car_horn\n",
+ " audio \\\n",
+ "0 [-0.022979736328125, -0.021820068359375, -0.02... \n",
+ "1 [0.313934326171875, 0.312774658203125, 0.31698... \n",
+ "2 [0.0655517578125, 0.011505126953125, -0.024536... \n",
+ "3 [0.063690185546875, 0.065216064453125, 0.07296... \n",
+ "4 [-0.006866455078125, -0.007476806640625, -0.00... \n",
+ "\n",
+ " vector sampling_rate \\\n",
+ "0 [0.0, 0.12407931, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n",
+ "1 [0.0, 0.5878662, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... 44100 \n",
+ "2 [0.0, 0.7369921, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... 44100 \n",
+ "3 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... 44100 \n",
+ "4 [0.0, 0.42053863, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 44100 \n",
+ "\n",
+ " category _distance \n",
+ "0 airplane 85.660736 \n",
+ "1 washing_machine 91.059029 \n",
+ "2 vacuum_cleaner 110.453621 \n",
+ "3 clapping 111.933441 \n",
+ "4 footsteps 115.770401 \n",
+ "0. Category: airplane\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ ":9: UnsupportedWarning: to_df is unsupported as of 0.4.0. Use to_pandas() instead\n",
+ " result = tbl.search(embedding[0]).limit(5).to_df()\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "1. Category: washing_machine\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "2. Category: vacuum_cleaner\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "3. Category: clapping\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "4. Category: footsteps\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ }
],
- "text/plain": [
- ""
+ "source": [
+ "search_audio(125)"
]
- },
- "metadata": {},
- "output_type": "display_data"
}
- ],
- "source": [
- "search_audio(125)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "3X3pePawWRjx"
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "gpuType": "T4",
- "provenance": []
- },
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.1"
+ }
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.12.1"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/examples/product-recommender/lancedb_cloud/README.md b/examples/product-recommender/lancedb_cloud/README.md
index 368725cc..309697f6 100644
--- a/examples/product-recommender/lancedb_cloud/README.md
+++ b/examples/product-recommender/lancedb_cloud/README.md
@@ -20,11 +20,18 @@ if you would like to set api key through an environment variable:
```
export LANCEDB_API_KEY="sk_..."
```
+or
+```
+import os
+import getpass
+
+os.environ["LANCEDB_API_KEY"] = getpass.getpass("Enter Your LANCEDB API Key:")
+```
replace the following lines in main.py with your project slug and api key"
```
db_url = "db://your-project-name"
- api_key="sk_..."
+api_key="sk_..."
```
Run the script
diff --git a/examples/product-recommender/lancedb_cloud/main.ipynb b/examples/product-recommender/lancedb_cloud/main.ipynb
index f3c6f44c..c32d1ff2 100644
--- a/examples/product-recommender/lancedb_cloud/main.ipynb
+++ b/examples/product-recommender/lancedb_cloud/main.ipynb
@@ -14,71 +14,46 @@
},
{
"cell_type": "markdown",
+ "metadata": {
+ "id": "sCtHNvkbzSot"
+ },
"source": [
"## Credentials\n",
"\n",
"Copy and paste the project name and the api key from your project page.\n",
"These will be used later to [connect to LanceDB Cloud](#scroll-to=5q8m6GMD7sGu)"
- ],
- "metadata": {
- "id": "sCtHNvkbzSot"
- }
+ ]
},
{
"cell_type": "code",
- "source": [
- "project_slug = \"your-project-slug\" # @param {type:\"string\"}"
- ],
+ "execution_count": 2,
"metadata": {
"id": "zpPM2T8zzZkw"
},
- "execution_count": 2,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "project_slug = \"your-project-slug\" # @param {type:\"string\"}"
+ ]
},
{
"cell_type": "code",
- "source": [
- "api_key = \"sk_...\" # @param {type:\"string\"}"
- ],
+ "execution_count": 3,
"metadata": {
"id": "xgCqtc99zwUQ"
},
- "execution_count": 3,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "api_key = \"sk_...\" # @param {type:\"string\"}"
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "You can also set the LANCEDB_API_KEY as an environment variable with one of the options below"
- ],
"metadata": {
"id": "eEITDnEczz7G"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "!export LANCEDB_API_KEY=\"sk_...\""
- ],
- "metadata": {
- "id": "Md5kS8s7z0-j"
},
- "execution_count": 3,
- "outputs": []
- },
- {
- "cell_type": "code",
"source": [
- "import os\n",
- "import getpass\n",
- "\n",
- "os.environ[\"LANCEDB_API_KEY\"] = getpass.getpass(\"Enter Your LANCEDB API Key:\")"
- ],
- "metadata": {
- "id": "d7gq19Wez3JZ"
- },
- "execution_count": null,
- "outputs": []
+ "You can also set the LANCEDB_API_KEY as an environment variable. More details can be found **here**."
+ ]
},
{
"cell_type": "markdown",
@@ -94,19 +69,19 @@
"cell_type": "code",
"execution_count": 4,
"metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
"id": "3jXSVspr7sGe",
+ "outputId": "4c09916d-85de-46d6-9c16-ed6746ac4e19",
"vscode": {
"languageId": "shellscript"
- },
- "outputId": "4c09916d-85de-46d6-9c16-ed6746ac4e19",
- "colab": {
- "base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"--2024-01-23 03:30:37-- http://vectordb-recipes.s3.us-west-2.amazonaws.com/product-recommender.zip\n",
"Resolving vectordb-recipes.s3.us-west-2.amazonaws.com (vectordb-recipes.s3.us-west-2.amazonaws.com)... 3.5.84.12, 3.5.84.155, 3.5.84.131, ...\n",
@@ -163,8 +138,8 @@
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.23.5)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n",
@@ -377,23 +352,14 @@
},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"13863749\n"
]
},
{
- "output_type": "execute_result",
"data": {
- "text/plain": [
- " user_id product_id total_orders\n",
- "13863744 206209 48697 1\n",
- "13863745 206209 48742 2\n",
- "13863746 206210 46149 50\n",
- "13863747 206211 27845 49\n",
- "13863748 206211 26604 32"
- ],
"text/html": [
"\n",
" \n",
@@ -662,10 +628,19 @@
"
\n",
" \n",
" \n"
+ ],
+ "text/plain": [
+ " user_id product_id total_orders\n",
+ "13863744 206209 48697 1\n",
+ "13863745 206209 48742 2\n",
+ "13863746 206210 46149 50\n",
+ "13863747 206211 27845 49\n",
+ "13863748 206211 26604 32"
]
},
+ "execution_count": 11,
"metadata": {},
- "execution_count": 11
+ "output_type": "execute_result"
}
],
"source": [
@@ -758,26 +733,26 @@
},
"outputs": [
{
- "output_type": "stream",
"name": "stderr",
+ "output_type": "stream",
"text": [
"/usr/local/lib/python3.10/dist-packages/implicit/cpu/als.py:95: RuntimeWarning: OpenBLAS is configured to use 2 threads. It is highly recommended to disable its internal threadpool by setting the environment variable 'OPENBLAS_NUM_THREADS=1' or by calling 'threadpoolctl.threadpool_limits(1, \"blas\")'. Having OpenBLAS use a threadpool can lead to severe performance issues here.\n",
" check_blas_config()\n"
]
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- " 0%| | 0/50 [00:00, ?it/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "2c0101b0a3574a14b2a37fc431eb2908",
"version_major": 2,
- "version_minor": 0,
- "model_id": "2c0101b0a3574a14b2a37fc431eb2908"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/50 [00:00, ?it/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
@@ -831,21 +806,20 @@
},
"outputs": [
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- " 0%| | 0/192802 [00:00, ?it/s]"
- ],
"application/vnd.jupyter.widget-view+json": {
+ "model_id": "5b98b7b242994c999064688c9210c61b",
"version_major": 2,
- "version_minor": 0,
- "model_id": "5b98b7b242994c999064688c9210c61b"
- }
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/192802 [00:00, ?it/s]"
+ ]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "execute_result",
"data": {
"text/plain": [
"{'precision': 0.2742377453615933,\n",
@@ -854,8 +828,9 @@
" 'auc': 0.6549935260418878}"
]
},
+ "execution_count": 15,
"metadata": {},
- "execution_count": 15
+ "output_type": "execute_result"
}
],
"source": [
@@ -886,7 +861,6 @@
},
"outputs": [
{
- "output_type": "execute_result",
"data": {
"text/plain": [
"array([[ 4.18832153e-03, 3.25558195e-03, -1.20758591e-02,\n",
@@ -977,8 +951,9 @@
" 3.88169941e-03, 4.75861132e-03]], dtype=float32)"
]
},
+ "execution_count": 17,
"metadata": {},
- "execution_count": 17
+ "output_type": "execute_result"
}
],
"source": [
@@ -997,7 +972,6 @@
},
"outputs": [
{
- "output_type": "execute_result",
"data": {
"text/plain": [
"array([[-0.48312342, -0.16332878, -0.27058715, -0.68734646, 0.55745304,\n",
@@ -1054,8 +1028,9 @@
" 0.736535 , -1.2620703 , -0.16571261]], dtype=float32)"
]
},
+ "execution_count": 18,
"metadata": {},
- "execution_count": 18
+ "output_type": "execute_result"
}
],
"source": [
@@ -1087,30 +1062,18 @@
},
{
"cell_type": "code",
- "source": [
- "data.head()"
- ],
+ "execution_count": 21,
"metadata": {
- "id": "9YiqyzadgiQl",
- "outputId": "df0e60c3-eef5-4a1f-efe5-2f0d927a38d4",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
- }
+ },
+ "id": "9YiqyzadgiQl",
+ "outputId": "df0e60c3-eef5-4a1f-efe5-2f0d927a38d4"
},
- "execution_count": 21,
"outputs": [
{
- "output_type": "execute_result",
"data": {
- "text/plain": [
- " user_id product_id total_orders\n",
- "0 1 196 11\n",
- "1 1 10258 10\n",
- "2 1 10326 1\n",
- "3 1 12427 10\n",
- "4 1 13032 4"
- ],
"text/html": [
"\n",
" \n",
@@ -1379,11 +1342,23 @@
"
\n",
" \n",
" \n"
+ ],
+ "text/plain": [
+ " user_id product_id total_orders\n",
+ "0 1 196 11\n",
+ "1 1 10258 10\n",
+ "2 1 10326 1\n",
+ "3 1 12427 10\n",
+ "4 1 13032 4"
]
},
+ "execution_count": 21,
"metadata": {},
- "execution_count": 21
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "data.head()"
]
},
{
@@ -1450,22 +1425,22 @@
"cell_type": "code",
"execution_count": 24,
"metadata": {
- "id": "H8HyvjCFSeaz",
- "outputId": "27519f2a-e95a-4442-97b1-291931180ca8",
"colab": {
"base_uri": "https://localhost:8080/"
- }
+ },
+ "id": "H8HyvjCFSeaz",
+ "outputId": "27519f2a-e95a-4442-97b1-291931180ca8"
},
"outputs": [
{
- "output_type": "execute_result",
"data": {
"text/plain": [
"{}"
]
},
+ "execution_count": 24,
"metadata": {},
- "execution_count": 24
+ "output_type": "execute_result"
}
],
"source": [
@@ -1550,33 +1525,7 @@
},
"outputs": [
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- " product_id product_name \\\n",
- "0 196 Soda \n",
- "1 46149 Zero Calorie Cola \n",
- "2 40939 Drinking Water \n",
- "3 37710 Trail Mix \n",
- "4 22802 Mineral Water \n",
- "5 41400 Crunchy Oats 'n Honey Granola Bars \n",
- "6 46061 Popcorn \n",
- "7 31651 Extra Fancy Unsalted Mixed Nuts \n",
- "8 5258 Sparkling Water \n",
- "9 38928 0% Greek Strained Yogurt \n",
- "\n",
- " vector _distance \n",
- "0 [-0.0030924827, -0.0042996905, -0.01350651, -0... 35.096085 \n",
- "1 [0.0015008126, -0.014029495, -0.015295635, 0.0... 35.392975 \n",
- "2 [0.0018837166, -0.018152414, -0.015649604, 0.0... 35.864483 \n",
- "3 [-0.0011668581, -0.0025222106, -0.016717039, -... 35.896873 \n",
- "4 [-0.010115783, -0.017115017, -0.011403508, 0.0... 36.035912 \n",
- "5 [0.0040870784, -0.0009994006, -0.018302424, -0... 36.042686 \n",
- "6 [0.0036969625, -0.013887798, -0.002804261, -0.... 36.043732 \n",
- "7 [0.014438897, -0.005578243, -0.0055169673, -0.... 36.117802 \n",
- "8 [-0.022658644, -0.026015628, -0.0083606485, -0... 36.131721 \n",
- "9 [0.0018425643, -0.011489441, -0.0052835834, 0.... 36.139870 "
- ],
"text/html": [
"\n",
" \n",
@@ -1886,17 +1835,38 @@
"
\n",
" \n",
" \n"
+ ],
+ "text/plain": [
+ " product_id product_name \\\n",
+ "0 196 Soda \n",
+ "1 46149 Zero Calorie Cola \n",
+ "2 40939 Drinking Water \n",
+ "3 37710 Trail Mix \n",
+ "4 22802 Mineral Water \n",
+ "5 41400 Crunchy Oats 'n Honey Granola Bars \n",
+ "6 46061 Popcorn \n",
+ "7 31651 Extra Fancy Unsalted Mixed Nuts \n",
+ "8 5258 Sparkling Water \n",
+ "9 38928 0% Greek Strained Yogurt \n",
+ "\n",
+ " vector _distance \n",
+ "0 [-0.0030924827, -0.0042996905, -0.01350651, -0... 35.096085 \n",
+ "1 [0.0015008126, -0.014029495, -0.015295635, 0.0... 35.392975 \n",
+ "2 [0.0018837166, -0.018152414, -0.015649604, 0.0... 35.864483 \n",
+ "3 [-0.0011668581, -0.0025222106, -0.016717039, -... 35.896873 \n",
+ "4 [-0.010115783, -0.017115017, -0.011403508, 0.0... 36.035912 \n",
+ "5 [0.0040870784, -0.0009994006, -0.018302424, -0... 36.042686 \n",
+ "6 [0.0036969625, -0.013887798, -0.002804261, -0.... 36.043732 \n",
+ "7 [0.014438897, -0.005578243, -0.0055169673, -0.... 36.117802 \n",
+ "8 [-0.022658644, -0.026015628, -0.0083606485, -0... 36.131721 \n",
+ "9 [0.0018425643, -0.011489441, -0.0052835834, 0.... 36.139870 "
]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- " product_id product_name total_orders\n",
- "0 46149 Zero Calorie Cola 50"
- ],
"text/html": [
"\n",
" \n",
@@ -2014,38 +1984,17 @@
"\n",
"
\n",
" \n"
+ ],
+ "text/plain": [
+ " product_id product_name total_orders\n",
+ "0 46149 Zero Calorie Cola 50"
]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- " product_id product_name \\\n",
- "0 26604 Organic Blackberries \n",
- "1 27845 Organic Whole Milk \n",
- "2 27966 Organic Raspberries \n",
- "3 43352 Raspberries \n",
- "4 9076 Blueberries \n",
- "5 21288 Blackberries \n",
- "6 39275 Organic Blueberries \n",
- "7 39928 Organic Kiwi \n",
- "8 11777 Red Raspberries \n",
- "9 21137 Organic Strawberries \n",
- "\n",
- " vector _distance \n",
- "0 [-0.017585486, 0.019628799, 0.0399348, 0.01422... 17.404045 \n",
- "1 [-0.050286394, 0.026924692, 0.030701049, -0.02... 17.404305 \n",
- "2 [-0.006732653, 0.015266006, 0.018316658, -0.00... 17.867121 \n",
- "3 [0.0037516877, 0.013682851, 0.057814274, 0.031... 18.030893 \n",
- "4 [0.0029817792, 0.030459687, 0.04528497, 0.0113... 18.135754 \n",
- "5 [-0.011553102, -0.010046569, 0.037375, 0.02368... 18.141661 \n",
- "6 [0.010543987, 0.006028164, 0.011502461, 0.0004... 18.241520 \n",
- "7 [-0.044292357, -0.031322725, -0.00174381, -0.0... 18.414057 \n",
- "8 [-0.0067819585, -0.023531102, 0.010277328, -0.... 18.468819 \n",
- "9 [0.007023127, 0.0037457773, -0.0061378656, -0.... 18.476973 "
- ],
"text/html": [
"\n",
" \n",
@@ -2355,18 +2304,38 @@
"
\n",
" \n",
" \n"
+ ],
+ "text/plain": [
+ " product_id product_name \\\n",
+ "0 26604 Organic Blackberries \n",
+ "1 27845 Organic Whole Milk \n",
+ "2 27966 Organic Raspberries \n",
+ "3 43352 Raspberries \n",
+ "4 9076 Blueberries \n",
+ "5 21288 Blackberries \n",
+ "6 39275 Organic Blueberries \n",
+ "7 39928 Organic Kiwi \n",
+ "8 11777 Red Raspberries \n",
+ "9 21137 Organic Strawberries \n",
+ "\n",
+ " vector _distance \n",
+ "0 [-0.017585486, 0.019628799, 0.0399348, 0.01422... 17.404045 \n",
+ "1 [-0.050286394, 0.026924692, 0.030701049, -0.02... 17.404305 \n",
+ "2 [-0.006732653, 0.015266006, 0.018316658, -0.00... 17.867121 \n",
+ "3 [0.0037516877, 0.013682851, 0.057814274, 0.031... 18.030893 \n",
+ "4 [0.0029817792, 0.030459687, 0.04528497, 0.0113... 18.135754 \n",
+ "5 [-0.011553102, -0.010046569, 0.037375, 0.02368... 18.141661 \n",
+ "6 [0.010543987, 0.006028164, 0.011502461, 0.0004... 18.241520 \n",
+ "7 [-0.044292357, -0.031322725, -0.00174381, -0.0... 18.414057 \n",
+ "8 [-0.0067819585, -0.023531102, 0.010277328, -0.... 18.468819 \n",
+ "9 [0.007023127, 0.0037457773, -0.0061378656, -0.... 18.476973 "
]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
},
{
- "output_type": "display_data",
"data": {
- "text/plain": [
- " product_id product_name total_orders\n",
- "0 27845 Organic Whole Milk 49\n",
- "1 26604 Organic Blackberries 32"
- ],
"text/html": [
"\n",
" \n",
@@ -2617,9 +2586,15 @@
"
\n",
" \n",
" \n"
+ ],
+ "text/plain": [
+ " product_id product_name total_orders\n",
+ "0 27845 Organic Whole Milk 49\n",
+ "1 26604 Organic Blackberries 32"
]
},
- "metadata": {}
+ "metadata": {},
+ "output_type": "display_data"
}
],
"source": [
@@ -2662,32 +2637,25 @@
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
- "2c0101b0a3574a14b2a37fc431eb2908": {
+ "000f9e8fd1db4bc0a7aceeb822ca2b2e": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
- "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
+ "_model_name": "DescriptionStyleModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_31c3c90fa42f489796fba11d57799089",
- "IPY_MODEL_e13993dda2da40ff806d6e31a6e987d3",
- "IPY_MODEL_0bff70b647f3404fa15690ec9f3d0c78"
- ],
- "layout": "IPY_MODEL_674cf2d29d044cada59480813e0e8e58"
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
}
},
- "31c3c90fa42f489796fba11d57799089": {
+ "0bff70b647f3404fa15690ec9f3d0c78": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -2699,40 +2667,38 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_bfd4ff099ed14ab1bd79233beea7f402",
+ "layout": "IPY_MODEL_bf95fd811f79425bb2248525aeab7da0",
"placeholder": "",
- "style": "IPY_MODEL_000f9e8fd1db4bc0a7aceeb822ca2b2e",
- "value": "100%"
+ "style": "IPY_MODEL_46fb5083adf24ce4ae3fd4ea9aa4772e",
+ "value": " 50/50 [17:28<00:00, 20.73s/it]"
}
},
- "e13993dda2da40ff806d6e31a6e987d3": {
+ "2c0101b0a3574a14b2a37fc431eb2908": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
+ "_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_75b270d981de425ba1fd9a790b2a68ff",
- "max": 50,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_baafe1d810594384af1a5ffa4f2f5cb4",
- "value": 50
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_31c3c90fa42f489796fba11d57799089",
+ "IPY_MODEL_e13993dda2da40ff806d6e31a6e987d3",
+ "IPY_MODEL_0bff70b647f3404fa15690ec9f3d0c78"
+ ],
+ "layout": "IPY_MODEL_674cf2d29d044cada59480813e0e8e58"
}
},
- "0bff70b647f3404fa15690ec9f3d0c78": {
+ "31c3c90fa42f489796fba11d57799089": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
"model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
@@ -2744,16 +2710,31 @@
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
- "layout": "IPY_MODEL_bf95fd811f79425bb2248525aeab7da0",
+ "layout": "IPY_MODEL_bfd4ff099ed14ab1bd79233beea7f402",
"placeholder": "",
- "style": "IPY_MODEL_46fb5083adf24ce4ae3fd4ea9aa4772e",
- "value": " 50/50 [17:28<00:00, 20.73s/it]"
+ "style": "IPY_MODEL_000f9e8fd1db4bc0a7aceeb822ca2b2e",
+ "value": "100%"
}
},
- "674cf2d29d044cada59480813e0e8e58": {
+ "46fb5083adf24ce4ae3fd4ea9aa4772e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "4b0298a9ecf84b509fbf379d43339b9c": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2802,10 +2783,10 @@
"width": null
}
},
- "bfd4ff099ed14ab1bd79233beea7f402": {
+ "4b20ad4b356645bbbfb94929160943f2": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2854,25 +2835,48 @@
"width": null
}
},
- "000f9e8fd1db4bc0a7aceeb822ca2b2e": {
+ "5b98b7b242994c999064688c9210c61b": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_d5b1eb34ddc949aebd25b3744b93b726",
+ "IPY_MODEL_752d37b9a68b42d284493645962f3782",
+ "IPY_MODEL_f0def002c7ca41f6a70e9dba1bc605c7"
+ ],
+ "layout": "IPY_MODEL_4b0298a9ecf84b509fbf379d43339b9c"
+ }
+ },
+ "63b8646c732246988f566d0442a070e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
+ "bar_color": null,
"description_width": ""
}
},
- "75b270d981de425ba1fd9a790b2a68ff": {
+ "674cf2d29d044cada59480813e0e8e58": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2921,26 +2925,34 @@
"width": null
}
},
- "baafe1d810594384af1a5ffa4f2f5cb4": {
+ "752d37b9a68b42d284493645962f3782": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
"state": {
+ "_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
+ "_model_name": "FloatProgressModel",
"_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4b20ad4b356645bbbfb94929160943f2",
+ "max": 192802,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_63b8646c732246988f566d0442a070e8",
+ "value": 192802
}
},
- "bf95fd811f79425bb2248525aeab7da0": {
+ "75b270d981de425ba1fd9a790b2a68ff": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -2989,113 +3001,10 @@
"width": null
}
},
- "46fb5083adf24ce4ae3fd4ea9aa4772e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "5b98b7b242994c999064688c9210c61b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_d5b1eb34ddc949aebd25b3744b93b726",
- "IPY_MODEL_752d37b9a68b42d284493645962f3782",
- "IPY_MODEL_f0def002c7ca41f6a70e9dba1bc605c7"
- ],
- "layout": "IPY_MODEL_4b0298a9ecf84b509fbf379d43339b9c"
- }
- },
- "d5b1eb34ddc949aebd25b3744b93b726": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_a37be209d5bb44e18f32c0259073d2c8",
- "placeholder": "",
- "style": "IPY_MODEL_b35984b48d8847eea119ee5eda049b9d",
- "value": "100%"
- }
- },
- "752d37b9a68b42d284493645962f3782": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_4b20ad4b356645bbbfb94929160943f2",
- "max": 192802,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_63b8646c732246988f566d0442a070e8",
- "value": 192802
- }
- },
- "f0def002c7ca41f6a70e9dba1bc605c7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_ae8581ec76314304b2078759e1dbdd7e",
- "placeholder": "",
- "style": "IPY_MODEL_d0e90066f1ec42afa5f1c02551d3889e",
- "value": " 192802/192802 [02:11<00:00, 1657.77it/s]"
- }
- },
- "4b0298a9ecf84b509fbf379d43339b9c": {
+ "a37be209d5bb44e18f32c0259073d2c8": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3144,10 +3053,10 @@
"width": null
}
},
- "a37be209d5bb44e18f32c0259073d2c8": {
+ "ae8581ec76314304b2078759e1dbdd7e": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3198,8 +3107,8 @@
},
"b35984b48d8847eea119ee5eda049b9d": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -3211,10 +3120,26 @@
"description_width": ""
}
},
- "4b20ad4b356645bbbfb94929160943f2": {
+ "baafe1d810594384af1a5ffa4f2f5cb4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "bf95fd811f79425bb2248525aeab7da0": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3263,26 +3188,10 @@
"width": null
}
},
- "63b8646c732246988f566d0442a070e8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "ae8581ec76314304b2078759e1dbdd7e": {
+ "bfd4ff099ed14ab1bd79233beea7f402": {
"model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
"model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
@@ -3333,8 +3242,8 @@
},
"d0e90066f1ec42afa5f1c02551d3889e": {
"model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
@@ -3345,6 +3254,72 @@
"_view_name": "StyleView",
"description_width": ""
}
+ },
+ "d5b1eb34ddc949aebd25b3744b93b726": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a37be209d5bb44e18f32c0259073d2c8",
+ "placeholder": "",
+ "style": "IPY_MODEL_b35984b48d8847eea119ee5eda049b9d",
+ "value": "100%"
+ }
+ },
+ "e13993dda2da40ff806d6e31a6e987d3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_75b270d981de425ba1fd9a790b2a68ff",
+ "max": 50,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_baafe1d810594384af1a5ffa4f2f5cb4",
+ "value": 50
+ }
+ },
+ "f0def002c7ca41f6a70e9dba1bc605c7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ae8581ec76314304b2078759e1dbdd7e",
+ "placeholder": "",
+ "style": "IPY_MODEL_d0e90066f1ec42afa5f1c02551d3889e",
+ "value": " 192802/192802 [02:11<00:00, 1657.77it/s]"
+ }
}
}
}
diff --git a/examples/product-recommender/main.ipynb b/examples/product-recommender/main.ipynb
index 66c5c688..48ee5b92 100644
--- a/examples/product-recommender/main.ipynb
+++ b/examples/product-recommender/main.ipynb
@@ -1,2979 +1,3132 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "YmdWGrw4t5G2"
- },
- "source": [
- "# Product Recommender using Collaborative Filtering and LanceDB\n",
- "\n",
- "We are going to use **LanceDB** and **Collaborative Filtering** to recommend products based on a user's past buying history. We used the **Instacart dataset** as our data for this example.\n",
- "\n",
- "![picture](https://daxg39y63pxwu.cloudfront.net/images/blog/product-recommendation-system-projects/Product_Recommendation_System_Project_Ideas_and_Examples.png)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "lXd46ecEt5G7"
- },
- "source": [
- "To run this example, you must first create a Kaggle account. Then, go to the 'Account' tab of your user profile and select 'Create New Token'. This will trigger the download of kaggle.json, a file containing your API credentials.\n",
- "\n",
- "Add Kaggle credentials to `~/.kaggle/kaggle.json` on Linux, OSX, and other UNIX-based operating systems or `C:\\Users\\\\.kaggle\\kaggle.json` for Window's users.\n",
- "\n",
- "In Google Colab, run the snippet below."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "id": "l6TTPIF_omEy",
- "outputId": "d2cf1685-103e-4b62-bae3-a16d171a928f",
- "colab": {
- "base_uri": "https://localhost:8080/"
- }
- },
- "outputs": [
+ "cells": [
{
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Kaggle API key file created and moved successfully.\n"
- ]
- }
- ],
- "source": [
- "import json\n",
- "import os\n",
- "\n",
- "# Set the file path\n",
- "kaggle_json_path = \"/content/kaggle.json\"\n",
- "\n",
- "# Write Kaggle API key to the file\n",
- "with open(kaggle_json_path, \"w\") as fp:\n",
- " json.dump({\"username\": \"\", \"key\": \"\"}, fp)\n",
- "\n",
- "# Move the file to the correct location\n",
- "os.system(\"mkdir -p ~/.kaggle\")\n",
- "os.system(f\"mv {kaggle_json_path} ~/.kaggle/kaggle.json\")\n",
- "\n",
- "# Set permissions\n",
- "os.system(\"chmod 600 ~/.kaggle/kaggle.json\")\n",
- "\n",
- "print(\"Kaggle API key file created and moved successfully.\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "c6G45HrUqNx5"
- },
- "source": [
- "### Install dependencies"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "YmdWGrw4t5G2"
+ },
+ "source": [
+ "# Product Recommender using Collaborative Filtering and LanceDB\n",
+ "\n",
+ "We are going to use **LanceDB** and **Collaborative Filtering** to recommend products based on a user's past buying history. We used the **Instacart dataset** as our data for this example.\n",
+ "\n",
+ "![picture](https://daxg39y63pxwu.cloudfront.net/images/blog/product-recommendation-system-projects/Product_Recommendation_System_Project_Ideas_and_Examples.png)"
+ ]
},
- "id": "R3_Hq2VC4_zT",
- "outputId": "ee47bbd5-d1c3-4900-894e-2530190e17e7"
- },
- "outputs": [
{
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.23.5)\n",
- "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n",
- "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (1.11.4)\n",
- "Requirement already satisfied: kaggle in /usr/local/lib/python3.10/dist-packages (1.5.16)\n",
- "Collecting implicit\n",
- " Downloading implicit-0.7.2-cp310-cp310-manylinux2014_x86_64.whl (8.9 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.9/8.9 MB\u001b[0m \u001b[31m18.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.1.0+cu121)\n",
- "Collecting lancedb\n",
- " Downloading lancedb-0.5.0-py3-none-any.whl (87 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.4/87.4 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
- "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.3.post1)\n",
- "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.10/dist-packages (from kaggle) (1.16.0)\n",
- "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from kaggle) (2023.11.17)\n",
- "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.31.0)\n",
- "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kaggle) (4.66.1)\n",
- "Requirement already satisfied: python-slugify in /usr/local/lib/python3.10/dist-packages (from kaggle) (8.0.1)\n",
- "Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.0.7)\n",
- "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from kaggle) (6.1.0)\n",
- "Requirement already satisfied: threadpoolctl in /usr/local/lib/python3.10/dist-packages (from implicit) (3.2.0)\n",
- "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.13.1)\n",
- "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.5.0)\n",
- "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n",
- "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.2.1)\n",
- "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.3)\n",
- "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n",
- "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.1.0)\n",
- "Collecting deprecation (from lancedb)\n",
- " Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)\n",
- "Collecting pylance==0.9.6 (from lancedb)\n",
- " Downloading pylance-0.9.6-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.6 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.6/18.6 MB\u001b[0m \u001b[31m58.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hCollecting ratelimiter~=1.0 (from lancedb)\n",
- " Downloading ratelimiter-1.2.0.post0-py3-none-any.whl (6.6 kB)\n",
- "Collecting retry>=0.9.2 (from lancedb)\n",
- " Downloading retry-0.9.2-py2.py3-none-any.whl (8.0 kB)\n",
- "Requirement already satisfied: pydantic>=1.10 in /usr/local/lib/python3.10/dist-packages (from lancedb) (1.10.13)\n",
- "Requirement already satisfied: attrs>=21.3.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (23.2.0)\n",
- "Collecting semver>=3.0 (from lancedb)\n",
- " Downloading semver-3.0.2-py3-none-any.whl (17 kB)\n",
- "Requirement already satisfied: cachetools in /usr/local/lib/python3.10/dist-packages (from lancedb) (5.3.2)\n",
- "Requirement already satisfied: pyyaml>=6.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (6.0.1)\n",
- "Requirement already satisfied: click>=8.1.7 in /usr/local/lib/python3.10/dist-packages (from lancedb) (8.1.7)\n",
- "Collecting overrides>=0.7 (from lancedb)\n",
- " Downloading overrides-7.6.0-py3-none-any.whl (17 kB)\n",
- "Collecting pyarrow>=12 (from pylance==0.9.6->lancedb)\n",
- " Downloading pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (38.3 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.3/38.3 MB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.3.2)\n",
- "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.6)\n",
- "Requirement already satisfied: decorator>=3.4.2 in /usr/local/lib/python3.10/dist-packages (from retry>=0.9.2->lancedb) (4.4.2)\n",
- "Collecting py<2.0.0,>=1.4.26 (from retry>=0.9.2->lancedb)\n",
- " Downloading py-1.11.0-py2.py3-none-any.whl (98 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 kB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hRequirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->kaggle) (0.5.1)\n",
- "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from deprecation->lancedb) (23.2)\n",
- "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.3)\n",
- "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify->kaggle) (1.3)\n",
- "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n",
- "Installing collected packages: ratelimiter, semver, pyarrow, py, overrides, deprecation, retry, pylance, implicit, lancedb\n",
- " Attempting uninstall: pyarrow\n",
- " Found existing installation: pyarrow 10.0.1\n",
- " Uninstalling pyarrow-10.0.1:\n",
- " Successfully uninstalled pyarrow-10.0.1\n",
- "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
- "ibis-framework 7.1.0 requires pyarrow<15,>=2, but you have pyarrow 15.0.0 which is incompatible.\u001b[0m\u001b[31m\n",
- "\u001b[0mSuccessfully installed deprecation-2.1.0 implicit-0.7.2 lancedb-0.5.0 overrides-7.6.0 py-1.11.0 pyarrow-15.0.0 pylance-0.9.6 ratelimiter-1.2.0.post0 retry-0.9.2 semver-3.0.2\n"
- ]
- }
- ],
- "source": [
- "!pip install numpy pandas scipy kaggle implicit torch lancedb"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "i_eatRhaIGIz"
- },
- "source": [
- "### Importing libraries"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "id": "emp_MSXZt5G8"
- },
- "outputs": [],
- "source": [
- "import zipfile\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "import scipy.sparse\n",
- "import torch\n",
- "import implicit\n",
- "from implicit import evaluation\n",
- "import pydantic\n",
- "import lancedb\n",
- "from lancedb.pydantic import pydantic_to_schema, vector"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "bUGkN85V4_zY"
- },
- "source": [
- "### Load the dataset\n",
- "Now we can download the dataset. You will need to accept the rules of the `instacart-market-basket-analysis` competition, which you can do so [here](https://www.kaggle.com/competitions/instacart-market-basket-analysis/rules)."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "lXd46ecEt5G7"
+ },
+ "source": [
+ "To downloading dataset in this example, you must have a Kaggle account.\n",
+ "\n",
+ "To get the Kaggle API credentials,\n",
+ "\n",
+ "Go to the Your Profile -> Settings -> Create Token\n",
+ "\n",
+ "This will download `kaggle.json`, a file containing your API credentials.\n",
+ "\n",
+ "Upload Kaggle credentials `kaggle.json` in Google Colab, run the snippet below."
+ ]
},
- "id": "09gdQyBu4_zY",
- "outputId": "bb92fb9e-df75-47a5-b50d-290ed0555ef4"
- },
- "outputs": [
{
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Downloading instacart-market-basket-analysis.zip to /content\n",
- " 92% 181M/196M [00:01<00:00, 81.3MB/s]\n",
- "100% 196M/196M [00:01<00:00, 105MB/s] \n"
- ]
- }
- ],
- "source": [
- "!kaggle competitions download -c instacart-market-basket-analysis"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "K4Q4cOX-4_zY"
- },
- "source": [
- "We must now extract the zip files."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "id": "f3g296nL4_zZ"
- },
- "outputs": [],
- "source": [
- "files = [\n",
- " \"instacart-market-basket-analysis.zip\",\n",
- " \"order_products__train.csv.zip\",\n",
- " \"order_products__prior.csv.zip\",\n",
- " \"products.csv.zip\",\n",
- " \"orders.csv.zip\",\n",
- "]\n",
- "\n",
- "for filename in files:\n",
- " with zipfile.ZipFile(filename, \"r\") as zip_ref:\n",
- " zip_ref.extractall(\"./\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "oLgkRIfq4_zZ"
- },
- "source": [
- "Now we can move on to loading the dataset. We'll first read the csv files and create dataframes."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {
- "id": "cBbbR7Rut5G_"
- },
- "outputs": [],
- "source": [
- "products = pd.read_csv(\"products.csv\")\n",
- "orders = pd.read_csv(\"orders.csv\")\n",
- "order_products = pd.concat(\n",
- " [pd.read_csv(\"order_products__train.csv\"), pd.read_csv(\"order_products__prior.csv\")]\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "5FV_GGjst5HA"
- },
- "source": [
- "Since there isn't a user rating attribute, we'll gather \"confidence\" data by looking at the frequency of each item purchased by a user, and store this in the `data` dataframe."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "YNgjd2nnqNx7"
- },
- "source": [
- "### Data Manipulation"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {
- "id": "ZjRh7RYpt5HB"
- },
- "outputs": [],
- "source": [
- "customer_order_products = pd.merge(orders, order_products, how=\"inner\", on=\"order_id\")\n",
- "\n",
- "# create confidence table\n",
- "data = (\n",
- " customer_order_products.groupby([\"user_id\", \"product_id\"])[[\"order_id\"]]\n",
- " .count()\n",
- " .reset_index()\n",
- ")\n",
- "data.columns = [\"user_id\", \"product_id\", \"total_orders\"]\n",
- "data.product_id = data.product_id.astype(\"int64\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "77lvwm0St5HC"
- },
- "source": [
- "Let's create a couple of test users to examine the recommendations later:\n",
- "- 1st test user: buys 50 sodas: **Zero Calorie Cola**\n",
- "- 2nd test user: buys organic produce: **Organic Whole Milk** and **Organic Blackberries**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 206
+ "cell_type": "code",
+ "source": [
+ "! pip install kaggle\n",
+ "! mkdir ~/.kaggle\n",
+ "! cp kaggle.json ~/.kaggle/\n",
+ "! chmod 600 ~/.kaggle/kaggle.json"
+ ],
+ "metadata": {
+ "id": "N3WSkW3kmjyF",
+ "outputId": "26294f7b-350e-41f9-afe0-e34c9dac3b9e",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 1,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: kaggle in /usr/local/lib/python3.10/dist-packages (1.5.16)\n",
+ "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.10/dist-packages (from kaggle) (1.16.0)\n",
+ "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from kaggle) (2024.2.2)\n",
+ "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.8.2)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.31.0)\n",
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kaggle) (4.66.2)\n",
+ "Requirement already satisfied: python-slugify in /usr/local/lib/python3.10/dist-packages (from kaggle) (8.0.4)\n",
+ "Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from kaggle) (2.0.7)\n",
+ "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from kaggle) (6.1.0)\n",
+ "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->kaggle) (0.5.1)\n",
+ "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify->kaggle) (1.3)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.3.2)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kaggle) (3.6)\n"
+ ]
+ }
+ ]
},
- "id": "A06EfAf-t5HC",
- "outputId": "95a1f51f-ced1-437a-8b62-569bb915262c"
- },
- "outputs": [
{
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- " user_id product_id total_orders\n",
- "13863744 206209 48697 1\n",
- "13863745 206209 48742 2\n",
- "13863746 206210 46149 50\n",
- "13863747 206211 27845 49\n",
- "13863748 206211 26604 32"
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "c6G45HrUqNx5"
+ },
+ "source": [
+ "### Install dependencies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "R3_Hq2VC4_zT",
+ "outputId": "752f8e45-ea8b-4b57-8a2b-0c7cb77f5f6c"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.25.2)\n",
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n",
+ "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (1.11.4)\n",
+ "Collecting implicit\n",
+ " Downloading implicit-0.7.2-cp310-cp310-manylinux2014_x86_64.whl (8.9 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.9/8.9 MB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.1.0+cu121)\n",
+ "Collecting lancedb\n",
+ " Downloading lancedb-0.6.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.3 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n",
+ "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from implicit) (4.66.2)\n",
+ "Requirement already satisfied: threadpoolctl in /usr/local/lib/python3.10/dist-packages (from implicit) (3.3.0)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.13.1)\n",
+ "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.10.0)\n",
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.2.1)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.3)\n",
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n",
+ "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.1.0)\n",
+ "Collecting deprecation (from lancedb)\n",
+ " Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)\n",
+ "Collecting pylance==0.10.1 (from lancedb)\n",
+ " Downloading pylance-0.10.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.5 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.5/21.5 MB\u001b[0m \u001b[31m28.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting ratelimiter~=1.0 (from lancedb)\n",
+ " Downloading ratelimiter-1.2.0.post0-py3-none-any.whl (6.6 kB)\n",
+ "Collecting retry>=0.9.2 (from lancedb)\n",
+ " Downloading retry-0.9.2-py2.py3-none-any.whl (8.0 kB)\n",
+ "Requirement already satisfied: pydantic>=1.10 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.6.3)\n",
+ "Requirement already satisfied: attrs>=21.3.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (23.2.0)\n",
+ "Collecting semver>=3.0 (from lancedb)\n",
+ " Downloading semver-3.0.2-py3-none-any.whl (17 kB)\n",
+ "Requirement already satisfied: cachetools in /usr/local/lib/python3.10/dist-packages (from lancedb) (5.3.3)\n",
+ "Requirement already satisfied: pyyaml>=6.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (6.0.1)\n",
+ "Requirement already satisfied: click>=8.1.7 in /usr/local/lib/python3.10/dist-packages (from lancedb) (8.1.7)\n",
+ "Requirement already satisfied: requests>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.31.0)\n",
+ "Collecting overrides>=0.7 (from lancedb)\n",
+ " Downloading overrides-7.7.0-py3-none-any.whl (17 kB)\n",
+ "Requirement already satisfied: pyarrow>=12 in /usr/local/lib/python3.10/dist-packages (from pylance==0.10.1->lancedb) (14.0.2)\n",
+ "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (0.6.0)\n",
+ "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (2.16.3)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.3.2)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.6)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2.0.7)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2024.2.2)\n",
+ "Requirement already satisfied: decorator>=3.4.2 in /usr/local/lib/python3.10/dist-packages (from retry>=0.9.2->lancedb) (4.4.2)\n",
+ "Collecting py<2.0.0,>=1.4.26 (from retry>=0.9.2->lancedb)\n",
+ " Downloading py-1.11.0-py2.py3-none-any.whl (98 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 kB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from deprecation->lancedb) (23.2)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.5)\n",
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n",
+ "Installing collected packages: ratelimiter, semver, py, overrides, deprecation, retry, pylance, implicit, lancedb\n",
+ "Successfully installed deprecation-2.1.0 implicit-0.7.2 lancedb-0.6.1 overrides-7.7.0 py-1.11.0 pylance-0.10.1 ratelimiter-1.2.0.post0 retry-0.9.2 semver-3.0.2\n"
+ ]
+ }
],
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " user_id | \n",
- " product_id | \n",
- " total_orders | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 13863744 | \n",
- " 206209 | \n",
- " 48697 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 13863745 | \n",
- " 206209 | \n",
- " 48742 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 13863746 | \n",
- " 206210 | \n",
- " 46149 | \n",
- " 50 | \n",
- "
\n",
- " \n",
- " 13863747 | \n",
- " 206211 | \n",
- " 27845 | \n",
- " 49 | \n",
- "
\n",
- " \n",
- " 13863748 | \n",
- " 206211 | \n",
- " 26604 | \n",
- " 32 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
+ "source": [
+ "!pip install numpy pandas scipy implicit torch lancedb"
]
- },
- "metadata": {},
- "execution_count": 15
- }
- ],
- "source": [
- "data_new = pd.DataFrame(\n",
- " [\n",
- " [data.user_id.max() + 1, 46149, 50],\n",
- " [data.user_id.max() + 2, 27845, 49],\n",
- " [data.user_id.max() + 2, 26604, 32],\n",
- " ],\n",
- " columns=[\"user_id\", \"product_id\", \"total_orders\"],\n",
- ")\n",
- "data = pd.concat([data, data_new]).reset_index(drop=True)\n",
- "data.tail()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "xBC-8PFTt5HD"
- },
- "source": [
- "In the next step, we will extract user and product unique ids, in order to create a `CSR (Compressed Sparse Row)` matrix. This will allow us to perform collaborative filtering.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {
- "id": "v2_2R7zmt5HE"
- },
- "outputs": [],
- "source": [
- "# extract unique user and product ids\n",
- "unique_users = list(np.sort(data.user_id.unique()))\n",
- "unique_products = list(np.sort(products.product_id.unique()))\n",
- "purchases = list(data.total_orders)\n",
- "\n",
- "# create zero-based index position <-> user/item ID mappings\n",
- "index_to_user = pd.Series(unique_users)\n",
- "\n",
- "# create reverse mappings from user/item ID to index positions\n",
- "user_to_index = pd.Series(data=index_to_user.index + 1, index=index_to_user.values)\n",
- "\n",
- "# create row and column for user and product ids\n",
- "users_rows = data.user_id.astype(int)\n",
- "products_cols = data.product_id.astype(int)\n",
- "\n",
- "# create CSR matrix\n",
- "matrix = scipy.sparse.csr_matrix(\n",
- " (purchases, (users_rows, products_cols)),\n",
- " shape=(len(unique_users) + 1, len(unique_products) + 1),\n",
- ")\n",
- "matrix.data = np.nan_to_num(matrix.data, copy=False)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "II6wOH96t5HF"
- },
- "source": [
- "Let's now create a recommender model using the **implicit** library. The recommendation model is based off the algorithms described in the paper [Collaborative Filtering for Implicit Feedback Datasets](https://www.researchgate.net/publication/220765111_Collaborative_Filtering_for_Implicit_Feedback_Datasets) with performance optimizations described in [Applications of the Conjugate Gradient Method for Implicit Feedback Collaborative Filtering](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.379.6473&rep=rep1&type=pdf).\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "JDwIxGMnqNx8"
- },
- "source": [
- "# Difference between colloborative and content filtering\n",
- "\n",
- "![picture](https://miro.medium.com/v2/resize:fit:1400/0*R8qw_CXxCc4600bQ.png)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 104,
- "referenced_widgets": [
- "51febb09c3d54a1a9cf5dd896f3a24f6",
- "91b083fde4f14c39bbafb6fd099d44bd",
- "84fca55b676b4ef2add284492c8f4c3c",
- "bb2c985a09564562b6f040e31d817f07",
- "cc06b425a9364b6eb07ef77c4ff6fc48",
- "e2e92925bbb442f8a77e2d55886bfbfa",
- "bc7f6859319f455da1f552b66a6cf026",
- "66396eb857864cc8af94d7e2ced3102c",
- "38ddb81c475a472d8439dcf72261b727",
- "c095ad1b03a34c4e8b2077e373c82a5b",
- "692c702c31904e058c809ae772f1579a"
- ]
},
- "id": "k0GW99kxt5HF",
- "outputId": "548c2514-6194-43e4-dd24-6861f1808f5b"
- },
- "outputs": [
{
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/implicit/cpu/als.py:95: RuntimeWarning: OpenBLAS is configured to use 2 threads. It is highly recommended to disable its internal threadpool by setting the environment variable 'OPENBLAS_NUM_THREADS=1' or by calling 'threadpoolctl.threadpool_limits(1, \"blas\")'. Having OpenBLAS use a threadpool can lead to severe performance issues here.\n",
- " check_blas_config()\n"
- ]
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "i_eatRhaIGIz"
+ },
+ "source": [
+ "### Importing libraries"
+ ]
},
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- " 0%| | 0/50 [00:00, ?it/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "51febb09c3d54a1a9cf5dd896f3a24f6"
- }
- },
- "metadata": {}
- }
- ],
- "source": [
- "import os\n",
- "\n",
- "os.environ[\"OPENBLAS_NUM_THREADS\"] = \"1\"\n",
- "# split data into train and test splits\n",
- "train, test = evaluation.train_test_split(matrix, train_percentage=0.9)\n",
- "\n",
- "# initialize the recommender model\n",
- "model = implicit.als.AlternatingLeastSquares(\n",
- " factors=128, regularization=0.05, iterations=50, num_threads=1\n",
- ")\n",
- "\n",
- "alpha = 15\n",
- "train = (train * alpha).astype(\"double\")\n",
- "\n",
- "# train the model on CSR matrix\n",
- "model.fit(train, show_progress=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "yN80hSojt5HF"
- },
- "source": [
- "## Let's now evaluate the model."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 118,
- "referenced_widgets": [
- "ae94cc355e0c4f8b8b73824ae2ef5632",
- "b07491e5db2d42b499fce4d7caddfe6f",
- "f79ee40b5f854a8b99c57b7c5156d3cd",
- "3c2bc0b631644bb992905d55dfe0a7a8",
- "41ebafe8393c451f83bfd2132a677a67",
- "9b1e85ca94ef442fbd546647f72e6905",
- "ae1b3cb276f44f5ebab3eaf8f7b85e67",
- "2782769e3daa491385bcc8ae34f24f3b",
- "5d41569b941445bea2497c89d3c8e6cb",
- "5e7dd2740d174064ac2d1cbc75cb5909",
- "a67972dc3f264b3699816257f1ad9ed7"
- ]
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "id": "emp_MSXZt5G8"
+ },
+ "outputs": [],
+ "source": [
+ "import zipfile\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import scipy.sparse\n",
+ "import torch\n",
+ "import implicit\n",
+ "from implicit import evaluation\n",
+ "import pydantic\n",
+ "import lancedb\n",
+ "from lancedb.pydantic import pydantic_to_schema, vector"
+ ]
},
- "id": "BbD8of_nt5HG",
- "outputId": "0fd51c13-6aad-408c-8732-f634b900d88e"
- },
- "outputs": [
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- " 0%| | 0/192941 [00:00, ?it/s]"
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "bUGkN85V4_zY"
+ },
+ "source": [
+ "### Load the dataset\n",
+ "Now to download datasets, You need to get into competition of the `instacart-market-basket-analysis` competition, which you can do so [here](https://www.kaggle.com/competitions/instacart-market-basket-analysis/data)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "09gdQyBu4_zY",
+ "outputId": "b4851f6f-4b03-4f9e-ef24-e0fc3cca87e9"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Downloading instacart-market-basket-analysis.zip to /content\n",
+ " 93% 183M/196M [00:01<00:00, 115MB/s]\n",
+ "100% 196M/196M [00:01<00:00, 118MB/s]\n"
+ ]
+ }
],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "ae94cc355e0c4f8b8b73824ae2ef5632"
- }
- },
- "metadata": {}
+ "source": [
+ "!kaggle competitions download -c instacart-market-basket-analysis"
+ ]
},
{
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "{'precision': 0.27412284342591836,\n",
- " 'map': 0.04493413696144052,\n",
- " 'ndcg': 0.14451615505158932,\n",
- " 'auc': 0.6545342486842805}"
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "K4Q4cOX-4_zY"
+ },
+ "source": [
+ "We must now extract the zip files."
]
- },
- "metadata": {},
- "execution_count": 18
- }
- ],
- "source": [
- "test = (test * alpha).astype(\"double\")\n",
- "evaluation.ranking_metrics_at_k(\n",
- " model, train, test, K=100, show_progress=True, num_threads=1\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "LNmva3Dlt5HG"
- },
- "source": [
- "From the model, we'll be able to retrieve item and user factors, which we can use later on to store in LanceDB as vector embeddings."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
},
- "id": "JUtCROQKt5HG",
- "outputId": "b193a8a5-ad19-4103-d2ba-da795c2083ce"
- },
- "outputs": [
{
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "array([[-0.00393582, -0.01685037, -0.02514135, -0.00218876, -0.0010562 ,\n",
- " -0.00079798, 0.01819069, -0.00519188, -0.01228626, 0.00401638,\n",
- " -0.00781125, 0.00959024, 0.01423726, 0.00442174, 0.01037473,\n",
- " 0.02808587, 0.00244657, 0.0018454 , 0.02538132, 0.01683291,\n",
- " 0.01188253, 0.00087587, 0.0025703 , -0.00047981, 0.01450326,\n",
- " 0.01200323, 0.00787515, -0.00017644, -0.00753563, 0.01381539,\n",
- " 0.00135173, 0.01005786, 0.01090438, 0.00116869, 0.00096769,\n",
- " 0.00977502, -0.0167746 , 0.00648016, -0.00428325, 0.00548768,\n",
- " 0.00768948, -0.0004173 , -0.00244178, 0.01658725, 0.01461017,\n",
- " 0.00099183, 0.00801511, -0.00094962, -0.00111636, 0.01834919,\n",
- " 0.01020439, 0.01684855, 0.00937538, 0.00352314, 0.00628611,\n",
- " 0.01727425, -0.00045354, -0.0043545 , 0.00622296, 0.02763929,\n",
- " 0.0175414 , 0.0025494 , 0.00278871, 0.00882237, 0.01894817,\n",
- " 0.004546 , 0.00443751, 0.00223829, 0.00111541, -0.01148911,\n",
- " 0.0071995 , 0.0022778 , 0.0030574 , 0.01706898, 0.00943401,\n",
- " 0.01573355, 0.00645585, 0.00238012, 0.01112602, 0.00480064,\n",
- " -0.00925425, -0.00823171, 0.01947242, 0.01799377, 0.00935643,\n",
- " 0.02302277, 0.01127501, 0.00169189, 0.00442428, 0.0003463 ,\n",
- " -0.00588222, -0.00471497, 0.00715665, 0.01413584, 0.01044003,\n",
- " 0.00814752, 0.00225889, 0.00227687, 0.02267602, -0.01708956,\n",
- " 0.00555093, 0.00299377, 0.02626718, -0.00434333, 0.01339086,\n",
- " 0.00484108, 0.00288822, 0.00182178, 0.00865983, 0.01724493,\n",
- " 0.00959506, 0.00682333, 0.00606016, 0.01441876, -0.00705769,\n",
- " -0.00799371, -0.01118193, -0.01978158, 0.006458 , 0.01502528,\n",
- " 0.00674475, 0.00178147, 0.01719983, 0.02293287, 0.01255139,\n",
- " 0.01491496, 0.01808888, 0.00345866],\n",
- " [ 0.00323603, 0.00497766, 0.00453755, 0.00417772, 0.00438953,\n",
- " 0.00421532, 0.00495497, 0.0046146 , 0.00492151, 0.00497078,\n",
- " 0.0052129 , 0.00480909, 0.00148542, 0.00264552, 0.00288833,\n",
- " 0.00301518, 0.00390128, 0.00788652, 0.0076819 , 0.00252663,\n",
- " 0.00674695, 0.00518822, 0.00711363, 0.00226193, 0.0040475 ,\n",
- " 0.0043934 , 0.00432414, 0.00226067, 0.00580428, 0.00448218,\n",
- " 0.00547386, 0.00410828, 0.0054925 , 0.00461327, 0.00285034,\n",
- " 0.00054668, 0.00830063, 0.0044634 , 0.00304354, 0.00241769,\n",
- " 0.00575837, 0.00565463, 0.00181925, 0.00114148, 0.00551062,\n",
- " 0.00367526, 0.00311928, 0.00442262, 0.00386756, 0.00409707,\n",
- " 0.00323715, 0.00659947, 0.00576848, 0.00470546, 0.0029407 ,\n",
- " 0.00257845, 0.00305666, 0.00462309, 0.00227487, 0.00431858,\n",
- " 0.00174775, 0.00574619, 0.00167515, 0.00359569, 0.00230684,\n",
- " 0.00546688, 0.00494641, 0.001215 , 0.00104987, 0.00456155,\n",
- " 0.00275193, 0.00530424, 0.00941532, 0.00314335, 0.0058135 ,\n",
- " 0.00401683, 0.00584406, 0.00163502, 0.00739096, 0.00298902,\n",
- " 0.0027324 , 0.00274413, 0.00297064, 0.00482807, 0.00219656,\n",
- " 0.00475874, 0.00722822, 0.00152401, 0.00452986, 0.00267858,\n",
- " 0.00316171, 0.00149406, 0.00108999, 0.00543363, 0.00540665,\n",
- " 0.00531604, 0.00797952, 0.00464693, 0.0044198 , 0.00180882,\n",
- " 0.0080781 , 0.00504695, -0.00079688, 0.00432082, 0.00152117,\n",
- " 0.00216882, 0.00371563, 0.00295481, 0.00458806, 0.0035182 ,\n",
- " 0.00392778, 0.00552432, 0.0034868 , 0.00683633, 0.00367211,\n",
- " 0.0065281 , 0.00289658, 0.00729314, 0.0062108 , 0.00291619,\n",
- " 0.00393072, 0.00451638, 0.00429286, 0.00351242, 0.00382327,\n",
- " 0.00239837, 0.00322422, 0.00091544]], dtype=float32)"
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "id": "f3g296nL4_zZ"
+ },
+ "outputs": [],
+ "source": [
+ "files = [\n",
+ " \"instacart-market-basket-analysis.zip\",\n",
+ " \"order_products__train.csv.zip\",\n",
+ " \"order_products__prior.csv.zip\",\n",
+ " \"products.csv.zip\",\n",
+ " \"orders.csv.zip\",\n",
+ "]\n",
+ "\n",
+ "for filename in files:\n",
+ " with zipfile.ZipFile(filename, \"r\") as zip_ref:\n",
+ " zip_ref.extractall(\"./\")"
]
- },
- "metadata": {},
- "execution_count": 19
- }
- ],
- "source": [
- "model.item_factors[1:3]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
},
- "id": "O3onbJmnt5HG",
- "outputId": "c08302cb-ca84-43fa-9dc1-8ae7cfb9a6dc"
- },
- "outputs": [
{
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "array([[-1.156621 , -0.56613535, -2.132921 , 0.21048984, -2.4275026 ,\n",
- " 0.65278965, 0.29068047, -0.86535686, -1.1061512 , 0.56259805,\n",
- " 0.19742274, -1.2165526 , -0.62973964, -0.01139626, 0.89300275,\n",
- " 2.2871504 , 1.4771796 , -1.4062662 , 1.0189441 , 0.5945485 ,\n",
- " -0.18952619, 0.70189404, -1.3442475 , -0.02677805, 0.84128475,\n",
- " 2.0733142 , -1.7199677 , 0.5854054 , -0.4431385 , -0.42398763,\n",
- " 0.02329228, -0.21817428, 0.11456848, -0.60438013, 1.8845385 ,\n",
- " 0.48805752, 0.4914834 , 0.7036006 , -0.20515339, 0.26406226,\n",
- " -1.0394758 , 0.10504863, 0.15933166, 0.8230506 , -1.4198968 ,\n",
- " 1.5953054 , -0.17673688, -0.8304307 , -0.6108456 , 0.9837131 ,\n",
- " -0.7765777 , -0.17818405, -0.5966103 , 0.04043822, -0.5247469 ,\n",
- " 0.82219905, -1.2847204 , -0.15080781, 0.39815912, 0.38488662,\n",
- " 0.64036644, -0.41876483, -0.82841444, 0.14284681, 1.6959293 ,\n",
- " 0.32721832, 0.37919757, -0.12497136, -0.86765254, -1.2561089 ,\n",
- " 1.7805327 , -0.0725541 , 0.2972879 , 1.0693338 , 0.07123612,\n",
- " 1.1487474 , 0.03298713, -0.68131614, 0.54169786, -0.01313662,\n",
- " -0.45546463, -1.4612606 , 1.6720243 , -0.08753251, -0.48343715,\n",
- " 0.8998774 , 0.20779014, 0.5308798 , -1.4428797 , 0.6831403 ,\n",
- " -0.70852935, 0.64480275, 0.0724675 , 1.7675639 , -0.8195846 ,\n",
- " -1.1516594 , -0.12766095, 1.2877378 , 1.4163711 , -0.6665976 ,\n",
- " 0.7632217 , 0.0219117 , 1.1328388 , -0.6923931 , -0.07623906,\n",
- " -1.1971719 , -0.24483162, 1.1084096 , -1.389178 , -0.4599395 ,\n",
- " -0.53673255, -0.42448393, 0.3144093 , -0.51755965, 0.07051428,\n",
- " -0.8088044 , 0.17532958, 0.02752439, -0.50841916, 1.59531 ,\n",
- " 0.05944044, 0.68029535, 0.90922666, -0.06542441, 1.444127 ,\n",
- " 0.328831 , 1.3449801 , 0.3335227 ],\n",
- " [ 0.46968362, 0.5778823 , -0.25195542, -1.1158922 , 1.0398294 ,\n",
- " 0.49206567, -1.2944113 , -0.82420427, -1.0843799 , 0.94688237,\n",
- " 0.34702307, -2.128418 , 0.5895207 , 1.7771691 , 2.3814392 ,\n",
- " 3.4515288 , -1.1243666 , 0.40636566, -3.6058009 , 2.0127013 ,\n",
- " 2.630422 , 1.226146 , 1.0606844 , -1.3711332 , 0.4556484 ,\n",
- " -3.0049403 , 2.2014928 , 1.3779501 , 0.38636258, 0.1419977 ,\n",
- " 0.47818702, 1.5802301 , -1.4625446 , 0.14480898, 0.2072567 ,\n",
- " 0.69864166, -0.25921065, -0.69944 , -0.96478283, -1.1301236 ,\n",
- " 0.37057078, -0.4948973 , 2.461495 , 1.2349164 , -2.3684993 ,\n",
- " 1.3402634 , 0.06238265, -0.63602126, 0.12089054, -1.419062 ,\n",
- " 0.6596198 , 1.1927714 , 0.4584676 , -2.0229638 , 0.09702511,\n",
- " 1.0462239 , 0.11938996, 1.9599499 , 2.2067647 , 0.05910189,\n",
- " -0.45334807, -0.63888884, 1.8966726 , 0.29533964, 0.04228586,\n",
- " -0.6714943 , 1.2537432 , -1.1870966 , 1.0774132 , -0.1246227 ,\n",
- " 1.1661847 , 1.6405165 , 1.8046525 , -0.04009497, 0.63998705,\n",
- " 0.29115498, 0.11799651, -1.5368266 , -3.2563107 , 0.4452863 ,\n",
- " 1.9632971 , -0.77559155, -0.4075138 , -0.73139226, 1.9212514 ,\n",
- " -0.5371468 , -1.5187699 , 1.6507294 , 0.14245582, -0.5599329 ,\n",
- " 1.8405148 , 1.547712 , -0.93085283, 1.0936368 , 1.1122336 ,\n",
- " -0.9177783 , 0.44686067, -0.59135765, 1.5240254 , 0.9592764 ,\n",
- " 0.43910939, -0.09816932, 0.5969851 , 0.12710276, 0.15113388,\n",
- " 1.2623665 , -1.3553156 , 2.4717677 , -0.77363324, -0.70619744,\n",
- " -0.3764869 , -0.539016 , -0.12908189, -2.4355314 , 0.8730686 ,\n",
- " -1.0825372 , -1.5002979 , -2.2159626 , -0.7984329 , 2.1869748 ,\n",
- " -1.8168899 , 0.2638936 , -2.190666 , 0.754562 , -1.7908349 ,\n",
- " 0.87591416, 0.93109685, -0.33466753]], dtype=float32)"
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "oLgkRIfq4_zZ"
+ },
+ "source": [
+ "Now we can move on to loading the dataset. We'll first read the csv files and create dataframes."
]
- },
- "metadata": {},
- "execution_count": 20
- }
- ],
- "source": [
- "model.user_factors[1:3]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "38rssdYCBR4E"
- },
- "source": [
- "## Let's save the data and create a empty LanceDB Table using a Pydantic model.\n",
- "A Table is designed to store large numbers of columns and huge quantities of data! For those interested, a LanceDB is columnar-based, and uses Lance, an open data format to store data."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {
- "id": "3_ykVLT6t5HH"
- },
- "outputs": [],
- "source": [
- "db = lancedb.connect(\"data/lancedb\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {
- "id": "ufHsF0o4t5HI"
- },
- "outputs": [],
- "source": [
- "class ProductModel(pydantic.BaseModel):\n",
- " product_id: int\n",
- " product_name: str\n",
- " vector: vector(128)\n",
- "\n",
- "\n",
- "schema = pydantic_to_schema(ProductModel)\n",
- "table_name = \"product_recommender\"\n",
- "tbl = db.create_table(table_name, schema=schema, mode=\"overwrite\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "0-2K-g4-t5HJ"
- },
- "source": [
- "Let's now store our item factors into the table via the vector column of `product_entries`."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {
- "id": "NOOPF9zOt5HJ"
- },
- "outputs": [],
- "source": [
- "# Transform items into factors\n",
- "items_factors = model.item_factors\n",
- "product_entries = products[[\"product_id\", \"product_name\"]].drop_duplicates()\n",
- "product_entries[\"product_id\"] = product_entries.product_id.astype(\"int64\")\n",
- "item_embeddings = items_factors[1:].tolist()\n",
- "product_entries[\"vector\"] = item_embeddings\n",
- "\n",
- "tbl.add(product_entries)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "j3aU4z-tSbWE"
- },
- "source": [
- "## Let's create an ANN index in order to speed up retrieval. This might take a while."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {
- "id": "H8HyvjCFSeaz"
- },
- "outputs": [],
- "source": [
- "tbl.create_index(num_partitions=256, num_sub_vectors=16)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "ibNMrxyRt5HK"
- },
- "source": [
- "This is a helper method for analysing recommendations later.\n",
- "This method returns top N products that someone bought in the past (based on product quantity)."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {
- "id": "Uzgk5Od0t5HK"
- },
- "outputs": [],
- "source": [
- "def products_bought_by_user_in_the_past(user_id: int, top: int = 10):\n",
- " selected = data[data.user_id == user_id].sort_values(\n",
- " by=[\"total_orders\"], ascending=False\n",
- " )\n",
- "\n",
- " selected[\"product_name\"] = selected[\"product_id\"].map(\n",
- " product_entries.set_index(\"product_id\")[\"product_name\"]\n",
- " )\n",
- " selected = selected[[\"product_id\", \"product_name\", \"total_orders\"]].reset_index(\n",
- " drop=True\n",
- " )\n",
- " if selected.shape[0] < top:\n",
- " return selected\n",
- "\n",
- " return selected[:top]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "ULyVnHEXt5HK"
- },
- "source": [
- "Let's retrieve our test users so we can query for recommendations."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {
- "id": "Wwl7yFKTt5HK"
- },
- "outputs": [],
- "source": [
- "test_user_ids = [206210, 206211]\n",
- "test_user_factors = model.user_factors[user_to_index[test_user_ids]]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "wTh61ou3t5HL"
- },
- "source": [
- "## Let's now query LanceDB to retrieve recommendations."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 866
},
- "id": "UiZg4Iset5HL",
- "outputId": "3f647136-a3ae-4a11-af57-f5c0ec37dd9e"
- },
- "outputs": [
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- " product_id product_name \\\n",
- "0 46149 Zero Calorie Cola \n",
- "1 196 Soda \n",
- "2 40939 Drinking Water \n",
- "3 22802 Mineral Water \n",
- "4 37710 Trail Mix \n",
- "5 42500 Orange & Lemon Flavor Variety Pack Sparkling F... \n",
- "6 11759 Organic Simply Naked Pita Chips \n",
- "7 41400 Crunchy Oats 'n Honey Granola Bars \n",
- "8 46061 Popcorn \n",
- "9 26348 Mixed Fruit Fruit Snacks \n",
- "\n",
- " vector _distance \n",
- "0 [-0.014371638, -0.016776536, -0.026950998, -0.... 36.209068 \n",
- "1 [-0.031917833, -0.050772455, 0.013827451, -0.0... 36.464764 \n",
- "2 [-0.013426425, 0.0053616967, -0.01992105, -0.0... 36.504112 \n",
- "3 [-0.0062663523, -0.00076926383, -0.013624842, ... 36.615498 \n",
- "4 [-0.01988333, -0.014069387, -0.021995109, -0.0... 36.650448 \n",
- "5 [-0.009584657, -0.023491196, -0.033104196, -0.... 36.696648 \n",
- "6 [-0.009341286, -0.014609524, -0.0064758006, -0... 36.705814 \n",
- "7 [-0.013461881, -0.021371827, -0.02064814, -0.0... 36.709579 \n",
- "8 [0.0019679032, 0.00719048, -0.01262015, -0.005... 36.714954 \n",
- "9 [-0.0017672281, 0.0020188452, 0.012172974, -0.... 36.716858 "
- ],
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " product_id | \n",
- " product_name | \n",
- " vector | \n",
- " _distance | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 46149 | \n",
- " Zero Calorie Cola | \n",
- " [-0.014371638, -0.016776536, -0.026950998, -0.... | \n",
- " 36.209068 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 196 | \n",
- " Soda | \n",
- " [-0.031917833, -0.050772455, 0.013827451, -0.0... | \n",
- " 36.464764 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 40939 | \n",
- " Drinking Water | \n",
- " [-0.013426425, 0.0053616967, -0.01992105, -0.0... | \n",
- " 36.504112 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 22802 | \n",
- " Mineral Water | \n",
- " [-0.0062663523, -0.00076926383, -0.013624842, ... | \n",
- " 36.615498 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 37710 | \n",
- " Trail Mix | \n",
- " [-0.01988333, -0.014069387, -0.021995109, -0.0... | \n",
- " 36.650448 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 42500 | \n",
- " Orange & Lemon Flavor Variety Pack Sparkling F... | \n",
- " [-0.009584657, -0.023491196, -0.033104196, -0.... | \n",
- " 36.696648 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 11759 | \n",
- " Organic Simply Naked Pita Chips | \n",
- " [-0.009341286, -0.014609524, -0.0064758006, -0... | \n",
- " 36.705814 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 41400 | \n",
- " Crunchy Oats 'n Honey Granola Bars | \n",
- " [-0.013461881, -0.021371827, -0.02064814, -0.0... | \n",
- " 36.709579 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 46061 | \n",
- " Popcorn | \n",
- " [0.0019679032, 0.00719048, -0.01262015, -0.005... | \n",
- " 36.714954 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 26348 | \n",
- " Mixed Fruit Fruit Snacks | \n",
- " [-0.0017672281, 0.0020188452, 0.012172974, -0.... | \n",
- " 36.716858 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "id": "cBbbR7Rut5G_"
+ },
+ "outputs": [],
+ "source": [
+ "products = pd.read_csv(\"products.csv\")\n",
+ "orders = pd.read_csv(\"orders.csv\")\n",
+ "order_products = pd.concat(\n",
+ " [pd.read_csv(\"order_products__train.csv\"), pd.read_csv(\"order_products__prior.csv\")]\n",
+ ")"
]
- },
- "metadata": {}
},
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- " product_id product_name total_orders\n",
- "0 46149 Zero Calorie Cola 50"
- ],
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " product_id | \n",
- " product_name | \n",
- " total_orders | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 46149 | \n",
- " Zero Calorie Cola | \n",
- " 50 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5FV_GGjst5HA"
+ },
+ "source": [
+ "Since there isn't a user rating attribute, we'll gather \"confidence\" data by looking at the frequency of each item purchased by a user, and store this in the `data` dataframe."
]
- },
- "metadata": {}
},
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- " product_id product_name \\\n",
- "0 26604 Organic Blackberries \n",
- "1 43352 Raspberries \n",
- "2 27845 Organic Whole Milk \n",
- "3 21288 Blackberries \n",
- "4 27966 Organic Raspberries \n",
- "5 9076 Blueberries \n",
- "6 11777 Red Raspberries \n",
- "7 39275 Organic Blueberries \n",
- "8 21137 Organic Strawberries \n",
- "9 13176 Bag of Organic Bananas \n",
- "\n",
- " vector _distance \n",
- "0 [0.045252558, 0.04258531, 0.011869884, -0.0111... 17.445852 \n",
- "1 [0.059606433, 0.014409931, 0.008712215, -0.007... 17.617174 \n",
- "2 [-0.03977351, 0.012210161, 0.024828656, 0.0155... 17.692816 \n",
- "3 [0.030181486, 0.049021076, 0.003293778, -0.038... 17.696075 \n",
- "4 [0.020116415, 0.045062356, 0.00675044, 0.01640... 17.872534 \n",
- "5 [0.0482006, 0.06329333, -0.015093377, 0.000180... 17.879623 \n",
- "6 [0.05492493, 0.008120705, 0.020613482, 0.00779... 17.931437 \n",
- "7 [0.005109854, 0.032895964, -0.013481544, 0.010... 17.970798 \n",
- "8 [0.0017651353, 0.033547334, -0.005775958, 0.02... 17.986570 \n",
- "9 [0.004607136, 0.02749164, -0.006206838, 0.0187... 18.092993 "
- ],
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " product_id | \n",
- " product_name | \n",
- " vector | \n",
- " _distance | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 26604 | \n",
- " Organic Blackberries | \n",
- " [0.045252558, 0.04258531, 0.011869884, -0.0111... | \n",
- " 17.445852 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 43352 | \n",
- " Raspberries | \n",
- " [0.059606433, 0.014409931, 0.008712215, -0.007... | \n",
- " 17.617174 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 27845 | \n",
- " Organic Whole Milk | \n",
- " [-0.03977351, 0.012210161, 0.024828656, 0.0155... | \n",
- " 17.692816 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 21288 | \n",
- " Blackberries | \n",
- " [0.030181486, 0.049021076, 0.003293778, -0.038... | \n",
- " 17.696075 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 27966 | \n",
- " Organic Raspberries | \n",
- " [0.020116415, 0.045062356, 0.00675044, 0.01640... | \n",
- " 17.872534 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 9076 | \n",
- " Blueberries | \n",
- " [0.0482006, 0.06329333, -0.015093377, 0.000180... | \n",
- " 17.879623 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 11777 | \n",
- " Red Raspberries | \n",
- " [0.05492493, 0.008120705, 0.020613482, 0.00779... | \n",
- " 17.931437 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 39275 | \n",
- " Organic Blueberries | \n",
- " [0.005109854, 0.032895964, -0.013481544, 0.010... | \n",
- " 17.970798 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 21137 | \n",
- " Organic Strawberries | \n",
- " [0.0017651353, 0.033547334, -0.005775958, 0.02... | \n",
- " 17.986570 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 13176 | \n",
- " Bag of Organic Bananas | \n",
- " [0.004607136, 0.02749164, -0.006206838, 0.0187... | \n",
- " 18.092993 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "YNgjd2nnqNx7"
+ },
+ "source": [
+ "### Data Manipulation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "id": "ZjRh7RYpt5HB"
+ },
+ "outputs": [],
+ "source": [
+ "customer_order_products = pd.merge(orders, order_products, how=\"inner\", on=\"order_id\")\n",
+ "\n",
+ "# create confidence table\n",
+ "data = (\n",
+ " customer_order_products.groupby([\"user_id\", \"product_id\"])[[\"order_id\"]]\n",
+ " .count()\n",
+ " .reset_index()\n",
+ ")\n",
+ "data.columns = [\"user_id\", \"product_id\", \"total_orders\"]\n",
+ "data.product_id = data.product_id.astype(\"int64\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "77lvwm0St5HC"
+ },
+ "source": [
+ "Let's create a couple of test users to examine the recommendations later:\n",
+ "- 1st test user: buys 50 sodas: **Zero Calorie Cola**\n",
+ "- 2nd test user: buys organic produce: **Organic Whole Milk** and **Organic Blackberries**"
]
- },
- "metadata": {}
},
{
- "output_type": "display_data",
- "data": {
- "text/plain": [
- " product_id product_name total_orders\n",
- "0 27845 Organic Whole Milk 49\n",
- "1 26604 Organic Blackberries 32"
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "A06EfAf-t5HC",
+ "outputId": "48ef0f5d-7c7a-4087-fd4b-8d3fa5ebaca1"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " user_id product_id total_orders\n",
+ "13863744 206209 48697 1\n",
+ "13863745 206209 48742 2\n",
+ "13863746 206210 46149 50\n",
+ "13863747 206211 27845 49\n",
+ "13863748 206211 26604 32"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user_id | \n",
+ " product_id | \n",
+ " total_orders | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 13863744 | \n",
+ " 206209 | \n",
+ " 48697 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 13863745 | \n",
+ " 206209 | \n",
+ " 48742 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 13863746 | \n",
+ " 206210 | \n",
+ " 46149 | \n",
+ " 50 | \n",
+ "
\n",
+ " \n",
+ " 13863747 | \n",
+ " 206211 | \n",
+ " 27845 | \n",
+ " 49 | \n",
+ "
\n",
+ " \n",
+ " 13863748 | \n",
+ " 206211 | \n",
+ " 26604 | \n",
+ " 32 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"data\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"user_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 206209,\n \"max\": 206211,\n \"num_unique_values\": 3,\n \"samples\": [\n 206209,\n 206210,\n 206211\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"product_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11361,\n \"min\": 26604,\n \"max\": 48742,\n \"num_unique_values\": 5,\n \"samples\": [\n 48742,\n 26604,\n 46149\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_orders\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24,\n \"min\": 1,\n \"max\": 50,\n \"num_unique_values\": 5,\n \"samples\": [\n 2,\n 32,\n 50\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
],
- "text/html": [
- "\n",
- " \n",
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " product_id | \n",
- " product_name | \n",
- " total_orders | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 27845 | \n",
- " Organic Whole Milk | \n",
- " 49 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 26604 | \n",
- " Organic Blackberries | \n",
- " 32 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
+ "source": [
+ "data_new = pd.DataFrame(\n",
+ " [\n",
+ " [data.user_id.max() + 1, 46149, 50],\n",
+ " [data.user_id.max() + 2, 27845, 49],\n",
+ " [data.user_id.max() + 2, 26604, 32],\n",
+ " ],\n",
+ " columns=[\"user_id\", \"product_id\", \"total_orders\"],\n",
+ ")\n",
+ "data = pd.concat([data, data_new]).reset_index(drop=True)\n",
+ "data.tail()"
]
- },
- "metadata": {}
- }
- ],
- "source": [
- "# Query by user factors\n",
- "test_user_embeddings = test_user_factors.tolist()\n",
- "for embedding, id in zip(test_user_embeddings, test_user_ids):\n",
- " results = tbl.search(embedding).limit(10).to_pandas()\n",
- " display(results)\n",
- " display(products_bought_by_user_in_the_past(id, top=15))"
- ]
- },
- {
- "cell_type": "code",
- "source": [],
- "metadata": {
- "id": "-kWR644v1ZJp"
- },
- "execution_count": null,
- "outputs": []
- }
- ],
- "metadata": {
- "accelerator": "GPU",
- "colab": {
- "gpuType": "T4",
- "provenance": []
- },
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.12.1"
- },
- "vscode": {
- "interpreter": {
- "hash": "5fe10bf018ef3e697f9035d60bf60847932a12bface18908407fd371fe880db9"
- }
- },
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "51febb09c3d54a1a9cf5dd896f3a24f6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_91b083fde4f14c39bbafb6fd099d44bd",
- "IPY_MODEL_84fca55b676b4ef2add284492c8f4c3c",
- "IPY_MODEL_bb2c985a09564562b6f040e31d817f07"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "xBC-8PFTt5HD"
+ },
+ "source": [
+ "In the next step, we will extract user and product unique ids, in order to create a `CSR (Compressed Sparse Row)` matrix. This will allow us to perform collaborative filtering.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "id": "v2_2R7zmt5HE"
+ },
+ "outputs": [],
+ "source": [
+ "# extract unique user and product ids\n",
+ "unique_users = list(np.sort(data.user_id.unique()))\n",
+ "unique_products = list(np.sort(products.product_id.unique()))\n",
+ "purchases = list(data.total_orders)\n",
+ "\n",
+ "# create zero-based index position <-> user/item ID mappings\n",
+ "index_to_user = pd.Series(unique_users)\n",
+ "\n",
+ "# create reverse mappings from user/item ID to index positions\n",
+ "user_to_index = pd.Series(data=index_to_user.index + 1, index=index_to_user.values)\n",
+ "\n",
+ "# create row and column for user and product ids\n",
+ "users_rows = data.user_id.astype(int)\n",
+ "products_cols = data.product_id.astype(int)\n",
+ "\n",
+ "# create CSR matrix\n",
+ "matrix = scipy.sparse.csr_matrix(\n",
+ " (purchases, (users_rows, products_cols)),\n",
+ " shape=(len(unique_users) + 1, len(unique_products) + 1),\n",
+ ")\n",
+ "matrix.data = np.nan_to_num(matrix.data, copy=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "II6wOH96t5HF"
+ },
+ "source": [
+ "Let's now create a recommender model using the **implicit** library. The recommendation model is based off the algorithms described in the paper [Collaborative Filtering for Implicit Feedback Datasets](https://www.researchgate.net/publication/220765111_Collaborative_Filtering_for_Implicit_Feedback_Datasets) with performance optimizations described in [Applications of the Conjugate Gradient Method for Implicit Feedback Collaborative Filtering](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.379.6473&rep=rep1&type=pdf).\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "JDwIxGMnqNx8"
+ },
+ "source": [
+ "# Difference between colloborative and content filtering\n",
+ "\n",
+ "![picture](https://miro.medium.com/v2/resize:fit:1400/0*R8qw_CXxCc4600bQ.png)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 104,
+ "referenced_widgets": [
+ "c159a1c6fc204d239b5ff7713d3c68fe",
+ "6e3b621f67554d6cbcaa50717008821f",
+ "1e5f629b939247c088b275a72310cfe0",
+ "cfde2bc68d9c448b823c690e15c4a169",
+ "8668f98cebeb4b548e87f2c4e68c9cbf",
+ "7ebca3dced8e4c029398db02169b868e",
+ "28400c62e971452b865e70af4e410afc",
+ "c45f8ded7dc84c18b479c3c427c29463",
+ "301f4f324d594ff2a63dc2f43ba4391f",
+ "0e3594636fbf4263b32d195f31fd29c0",
+ "adf0848d8d8440f18dbd001572772fce"
+ ]
+ },
+ "id": "k0GW99kxt5HF",
+ "outputId": "fd9c03c5-c668-4ddd-8fea-1b3e737b8ad6"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/implicit/cpu/als.py:95: RuntimeWarning: OpenBLAS is configured to use 2 threads. It is highly recommended to disable its internal threadpool by setting the environment variable 'OPENBLAS_NUM_THREADS=1' or by calling 'threadpoolctl.threadpool_limits(1, \"blas\")'. Having OpenBLAS use a threadpool can lead to severe performance issues here.\n",
+ " check_blas_config()\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ " 0%| | 0/50 [00:00, ?it/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "c159a1c6fc204d239b5ff7713d3c68fe"
+ }
+ },
+ "metadata": {}
+ }
],
- "layout": "IPY_MODEL_cc06b425a9364b6eb07ef77c4ff6fc48"
- }
+ "source": [
+ "import os\n",
+ "\n",
+ "os.environ[\"OPENBLAS_NUM_THREADS\"] = \"1\"\n",
+ "# split data into train and test splits\n",
+ "train, test = evaluation.train_test_split(matrix, train_percentage=0.9)\n",
+ "\n",
+ "# initialize the recommender model\n",
+ "model = implicit.als.AlternatingLeastSquares(\n",
+ " factors=128, regularization=0.05, iterations=50, num_threads=1\n",
+ ")\n",
+ "\n",
+ "alpha = 15\n",
+ "train = (train * alpha).astype(\"double\")\n",
+ "\n",
+ "# train the model on CSR matrix\n",
+ "model.fit(train, show_progress=True)"
+ ]
},
- "91b083fde4f14c39bbafb6fd099d44bd": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_e2e92925bbb442f8a77e2d55886bfbfa",
- "placeholder": "",
- "style": "IPY_MODEL_bc7f6859319f455da1f552b66a6cf026",
- "value": "100%"
- }
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yN80hSojt5HF"
+ },
+ "source": [
+ "## Let's now evaluate the model."
+ ]
},
- "84fca55b676b4ef2add284492c8f4c3c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_66396eb857864cc8af94d7e2ced3102c",
- "max": 50,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_38ddb81c475a472d8439dcf72261b727",
- "value": 50
- }
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 118,
+ "referenced_widgets": [
+ "142121b5c098477985d3bf5eb9560ad4",
+ "49f9dab3bf2748a2a0811a7057c32ff7",
+ "3ea9a47313cd496694180de85b51decf",
+ "1cd7d3c410ed449eb88cc8d78e49e10d",
+ "e66f741c3e794c69a328c715cc9b56a2",
+ "4a785b8e4b0d43eca0cf41c2b1cb2f35",
+ "05369b050a61407f8cd0c657afb9a6bd",
+ "9ffbed3caaf84e1db7bde609b6cc06a7",
+ "6142e63dd35c46839b9b8cd520750844",
+ "cb770a9f4224470bba0a7488b76a24c0",
+ "2dea74cc01b04e548bb7a77bd31a2fd2"
+ ]
+ },
+ "id": "BbD8of_nt5HG",
+ "outputId": "ccb5aee8-9bc9-4e9e-b85e-459568897587"
+ },
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ " 0%| | 0/192999 [00:00, ?it/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "142121b5c098477985d3bf5eb9560ad4"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "{'precision': 0.27477883977578244,\n",
+ " 'map': 0.04505803167409894,\n",
+ " 'ndcg': 0.14491547666623716,\n",
+ " 'auc': 0.6550619166364096}"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ],
+ "source": [
+ "test = (test * alpha).astype(\"double\")\n",
+ "evaluation.ranking_metrics_at_k(\n",
+ " model, train, test, K=100, show_progress=True, num_threads=1\n",
+ ")"
+ ]
},
- "bb2c985a09564562b6f040e31d817f07": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_c095ad1b03a34c4e8b2077e373c82a5b",
- "placeholder": "",
- "style": "IPY_MODEL_692c702c31904e058c809ae772f1579a",
- "value": " 50/50 [15:32<00:00, 18.28s/it]"
- }
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "LNmva3Dlt5HG"
+ },
+ "source": [
+ "From the model, we'll be able to retrieve item and user factors, which we can use later on to store in LanceDB as vector embeddings."
+ ]
},
- "cc06b425a9364b6eb07ef77c4ff6fc48": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "JUtCROQKt5HG",
+ "outputId": "948e1487-f0d0-4f54-958e-5328ee8ce5dc"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([[-0.01073535, 0.01225309, 0.00282226, -0.00914562, 0.01481111,\n",
+ " 0.00767373, -0.00427731, 0.0056481 , 0.00795351, 0.00424179,\n",
+ " -0.00455681, -0.00175643, -0.00220297, -0.0138361 , -0.00829704,\n",
+ " -0.00559029, -0.01200527, 0.00596893, 0.00808288, -0.01018421,\n",
+ " 0.01595827, 0.00867552, 0.02999683, 0.00679287, 0.00992141,\n",
+ " 0.01169722, 0.00303244, 0.00791476, 0.01493086, -0.00200432,\n",
+ " 0.00475327, 0.01365075, -0.00702923, 0.00941817, 0.00221444,\n",
+ " 0.00278489, 0.01576312, 0.00883053, 0.00070464, 0.00061513,\n",
+ " -0.00012623, 0.00052815, 0.01637699, 0.00285431, 0.01877954,\n",
+ " 0.01524585, -0.00794455, 0.01723802, 0.00804117, 0.00352978,\n",
+ " 0.01410676, -0.00625158, -0.00453345, 0.02724608, 0.01960974,\n",
+ " -0.01250265, 0.01295316, -0.00220814, 0.01525659, 0.02175995,\n",
+ " -0.00712163, 0.02181616, 0.00632107, 0.01416669, 0.00973109,\n",
+ " 0.00702811, -0.00343407, -0.01017761, 0.00894559, -0.01581176,\n",
+ " 0.00393035, 0.01568489, -0.00015587, 0.0087583 , 0.00432176,\n",
+ " 0.01403052, -0.01219444, 0.00682962, 0.03258877, 0.00117012,\n",
+ " 0.01065344, 0.01794718, -0.01094627, -0.00213753, -0.01522113,\n",
+ " 0.01338973, 0.01311625, -0.0051905 , -0.00040473, 0.0117563 ,\n",
+ " 0.00617041, -0.00183781, 0.01292013, 0.01622365, 0.01600826,\n",
+ " 0.01047292, 0.00679411, 0.02034847, 0.00313357, 0.00643453,\n",
+ " 0.00643994, 0.0294148 , 0.00119474, 0.00918875, 0.00874455,\n",
+ " -0.00066996, -0.0076339 , 0.00600638, 0.02156091, 0.00289343,\n",
+ " 0.01004079, -0.00886633, 0.00642741, 0.01046264, 0.00393741,\n",
+ " 0.00406919, 0.01451393, -0.00050027, 0.01081037, -0.00308605,\n",
+ " 0.0040453 , 0.00611117, 0.01038004, -0.00762702, 0.00672026,\n",
+ " 0.00491092, 0.00185958, -0.00262321],\n",
+ " [ 0.00545662, 0.007053 , 0.00540562, 0.00373609, 0.00635536,\n",
+ " 0.00629239, 0.00513481, 0.00277516, 0.007132 , 0.00724808,\n",
+ " 0.00476382, 0.0071835 , 0.00610066, 0.00605023, 0.00480638,\n",
+ " 0.00770767, 0.00343371, 0.00492609, 0.00286885, 0.00230649,\n",
+ " 0.00343586, 0.00512864, 0.00704206, 0.00227453, 0.00775074,\n",
+ " 0.00259635, 0.00464828, 0.00654242, 0.00264723, 0.00269244,\n",
+ " 0.00486744, 0.00405304, 0.0053956 , 0.00702862, 0.00516442,\n",
+ " 0.00619653, 0.00276694, 0.00035582, 0.00418825, 0.00154351,\n",
+ " 0.00676016, 0.00293786, 0.00331635, 0.00074961, 0.00679756,\n",
+ " 0.00322292, 0.00703768, 0.0019903 , 0.00339576, 0.00558988,\n",
+ " 0.00578342, 0.00551919, 0.00649765, 0.00622123, 0.00219081,\n",
+ " 0.00116638, 0.00816363, 0.0051754 , 0.00474575, 0.00373885,\n",
+ " 0.00484177, 0.00307221, 0.00550832, 0.00405297, 0.00600216,\n",
+ " 0.00068458, 0.00389447, 0.00340401, 0.00041786, 0.00438944,\n",
+ " 0.00359013, 0.00517367, 0.00413423, 0.0033591 , 0.00573929,\n",
+ " 0.00269938, 0.00455329, 0.00603866, 0.00790164, 0.00580972,\n",
+ " 0.00060218, 0.00565166, 0.00748183, 0.00426076, 0.00486007,\n",
+ " 0.00501308, 0.00768831, 0.00909834, 0.00239457, 0.00698307,\n",
+ " 0.00221974, 0.00474268, 0.00050845, 0.00146767, 0.00812766,\n",
+ " -0.00106332, 0.00576758, 0.00434267, 0.00688091, 0.00063075,\n",
+ " 0.00535236, 0.00246389, 0.00355543, 0.00545268, 0.00545283,\n",
+ " 0.00351201, 0.00507428, 0.00600283, 0.0009795 , 0.00358418,\n",
+ " 0.00566337, 0.00459488, 0.00394963, 0.00848473, 0.00374577,\n",
+ " -0.00012899, 0.00295235, 0.00417557, 0.00134743, 0.00116836,\n",
+ " 0.00667214, 0.00117854, 0.0023317 , 0.00432837, 0.00205162,\n",
+ " 0.00543584, 0.00155425, 0.00754672]], dtype=float32)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 12
+ }
+ ],
+ "source": [
+ "model.item_factors[1:3]"
+ ]
},
- "e2e92925bbb442f8a77e2d55886bfbfa": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "O3onbJmnt5HG",
+ "outputId": "6e83ca8b-8d2d-4bea-8d01-5750e7b58647"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([[ 2.35114765e+00, -9.82077837e-01, 9.20681953e-02,\n",
+ " -1.55748022e+00, 2.61008650e-01, 1.38084328e+00,\n",
+ " -1.04197145e+00, 2.08925948e-01, 1.45271456e+00,\n",
+ " -4.09525931e-01, -2.79641271e-01, -1.06512582e+00,\n",
+ " -2.45185947e+00, -8.88424039e-01, -9.62235093e-01,\n",
+ " -3.62847820e-02, -9.97323275e-01, 3.57037872e-01,\n",
+ " 1.39508307e-01, -7.77906895e-01, -3.02864462e-01,\n",
+ " -2.49430239e-01, 2.07240963e+00, -1.16224551e+00,\n",
+ " 7.26323247e-01, 1.34066701e-01, -1.00640464e+00,\n",
+ " 6.03325069e-02, 1.24448466e+00, 3.97046000e-01,\n",
+ " -1.01987794e-01, -2.13813528e-01, -5.79491258e-02,\n",
+ " -3.17022443e-01, 7.47085869e-01, 1.62657106e+00,\n",
+ " 9.75901306e-01, 1.17893267e+00, -6.45162404e-01,\n",
+ " -1.40145004e+00, -6.50845766e-01, 4.65424120e-01,\n",
+ " 1.01861715e+00, 1.16076279e+00, 7.42953658e-01,\n",
+ " -5.01821935e-01, 4.48503673e-01, 3.03975850e-01,\n",
+ " -8.14426184e-01, -5.65647744e-02, 5.86561143e-01,\n",
+ " -3.05516303e-01, -1.21209860e+00, -4.88223583e-01,\n",
+ " 5.93207955e-01, -7.97120512e-01, 3.37936103e-01,\n",
+ " -1.40010929e+00, -5.07596850e-01, 1.20076036e+00,\n",
+ " 9.60147753e-02, -7.36100137e-01, 7.32163787e-01,\n",
+ " -6.26076534e-02, -9.86503780e-01, 1.08208275e+00,\n",
+ " 2.48168632e-01, -1.40475631e+00, -1.70012355e+00,\n",
+ " -8.03964674e-01, -4.82192487e-02, 2.58276653e+00,\n",
+ " -6.63681030e-01, 6.28947258e-01, -1.30332559e-01,\n",
+ " 6.26426578e-01, -7.09159493e-01, -2.51678526e-01,\n",
+ " 3.70608002e-01, 6.90244198e-01, 1.52901638e+00,\n",
+ " -9.07164812e-01, -3.33825918e-03, 2.82642663e-01,\n",
+ " -1.56681025e+00, -7.89902925e-01, -1.48571885e+00,\n",
+ " 4.32960272e-01, -3.47612590e-01, 2.16205135e-01,\n",
+ " 1.89403951e+00, -7.34427869e-01, 1.24272621e+00,\n",
+ " 8.83789957e-01, -8.86934042e-01, 2.14222240e+00,\n",
+ " 1.24191558e+00, 2.07501030e+00, -1.30105615e+00,\n",
+ " 1.14052501e-02, 1.34931052e+00, 1.88309300e+00,\n",
+ " -1.72559297e+00, -3.85144413e-01, 2.95971125e-01,\n",
+ " -8.28353167e-01, -6.39615953e-02, 1.42373240e+00,\n",
+ " 2.33709216e+00, 4.29843925e-02, 1.47847342e+00,\n",
+ " -2.92032450e-01, 6.43620074e-01, 8.92000616e-01,\n",
+ " -3.62094373e-01, 1.07280612e+00, -2.14163110e-01,\n",
+ " -1.21664122e-01, 8.64231884e-01, -1.27431108e-02,\n",
+ " -2.09421575e-01, 6.34409264e-02, -7.02818632e-01,\n",
+ " -4.97576185e-02, -1.50734171e-01, 2.71373838e-01,\n",
+ " -7.60752439e-01, -2.56484568e-01],\n",
+ " [-2.70342708e-01, 8.88925731e-01, 7.41030201e-02,\n",
+ " 2.24988461e+00, -4.16443706e-01, -6.09414756e-01,\n",
+ " -6.63036764e-01, -1.03103137e+00, -1.12276042e+00,\n",
+ " -1.73997521e+00, -1.05744338e+00, -3.40162873e-01,\n",
+ " -4.80260178e-02, -1.28994131e+00, -9.22097385e-01,\n",
+ " 2.52364874e-01, 3.80463481e-01, -2.41020039e-01,\n",
+ " -1.05217624e+00, 4.85703856e-01, -2.21715212e-01,\n",
+ " -5.14087617e-01, -9.42420840e-01, 7.15354204e-01,\n",
+ " -6.49898648e-01, 2.98441458e+00, 5.90562761e-01,\n",
+ " 1.27846611e+00, 7.21186638e-01, 4.63127196e-01,\n",
+ " -2.18219861e-01, -1.13364458e+00, 8.96203935e-01,\n",
+ " 3.13969404e-01, -1.23078191e+00, 1.81982982e+00,\n",
+ " 1.67659032e+00, 9.17877018e-01, -8.09818059e-02,\n",
+ " -8.91748905e-01, -3.56716752e-01, -5.39918005e-01,\n",
+ " 1.46798015e+00, -7.61051416e-01, -1.02508759e+00,\n",
+ " -6.00555420e-01, -5.49519420e-01, -4.13337052e-01,\n",
+ " -2.15971828e+00, -7.64563233e-02, -1.52905011e+00,\n",
+ " -7.08452106e-01, -2.03598022e+00, -9.20440614e-01,\n",
+ " 1.53826848e-01, 1.56537902e+00, -1.45322108e+00,\n",
+ " 2.59730071e-01, 2.66617507e-01, -3.77679914e-01,\n",
+ " 3.37540567e-01, -4.00173254e-02, 8.33883584e-01,\n",
+ " 8.45754921e-01, 1.39245242e-01, 9.91499722e-01,\n",
+ " 4.64247793e-01, -3.97137880e-01, -1.03083467e+00,\n",
+ " -1.72587514e+00, -4.60681379e-01, 1.62118340e+00,\n",
+ " 3.89182389e-01, -9.17263985e-01, -1.27384162e+00,\n",
+ " 1.91881967e+00, 1.76994383e+00, 7.85243988e-01,\n",
+ " -1.10948071e-01, 1.41002858e+00, 2.25326085e+00,\n",
+ " -6.71080649e-01, 6.25545800e-01, -6.13183640e-02,\n",
+ " 5.39246261e-01, 8.63722503e-01, 1.46043479e-01,\n",
+ " -7.11409628e-01, 3.97266221e+00, 2.32369137e+00,\n",
+ " 2.12601995e+00, -1.27442431e+00, 5.20430267e-01,\n",
+ " -2.87687361e-01, -2.77719474e+00, 4.49669933e+00,\n",
+ " -7.76941776e-02, -1.42210579e+00, 1.07571304e+00,\n",
+ " 2.24175000e+00, 1.94092798e+00, -4.92816478e-01,\n",
+ " 1.43253422e+00, -2.90138405e-02, 1.13699532e+00,\n",
+ " 1.20133042e+00, -3.55294824e-01, 2.76309562e+00,\n",
+ " 2.45419478e+00, -6.42450869e-01, -2.90709686e+00,\n",
+ " 1.28045070e+00, -3.66204560e-01, -4.94375974e-01,\n",
+ " -2.83194995e+00, -8.55712235e-01, 3.17946784e-02,\n",
+ " -6.97229877e-02, 1.12658954e+00, 1.04045498e+00,\n",
+ " -7.16470957e-01, -5.65994203e-01, -1.13983297e+00,\n",
+ " 2.50437784e+00, 3.62668425e-01, 1.46130455e+00,\n",
+ " -9.03123736e-01, -3.25637698e-01]], dtype=float32)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ],
+ "source": [
+ "model.user_factors[1:3]"
+ ]
},
- "bc7f6859319f455da1f552b66a6cf026": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "38rssdYCBR4E"
+ },
+ "source": [
+ "## Let's save the data and create a empty LanceDB Table using a Pydantic model.\n",
+ "A Table is designed to store large numbers of columns and huge quantities of data! For those interested, a LanceDB is columnar-based, and uses Lance, an open data format to store data."
+ ]
},
- "66396eb857864cc8af94d7e2ced3102c": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "id": "3_ykVLT6t5HH"
+ },
+ "outputs": [],
+ "source": [
+ "db = lancedb.connect(\"data/lancedb\")"
+ ]
},
- "38ddb81c475a472d8439dcf72261b727": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "id": "ufHsF0o4t5HI"
+ },
+ "outputs": [],
+ "source": [
+ "class ProductModel(pydantic.BaseModel):\n",
+ " product_id: int\n",
+ " product_name: str\n",
+ " vector: vector(128)\n",
+ "\n",
+ "\n",
+ "schema = pydantic_to_schema(ProductModel)\n",
+ "table_name = \"product_recommender\"\n",
+ "tbl = db.create_table(table_name, schema=schema, mode=\"overwrite\")"
+ ]
},
- "c095ad1b03a34c4e8b2077e373c82a5b": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0-2K-g4-t5HJ"
+ },
+ "source": [
+ "Let's now store our item factors into the table via the vector column of `product_entries`."
+ ]
},
- "692c702c31904e058c809ae772f1579a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "id": "NOOPF9zOt5HJ"
+ },
+ "outputs": [],
+ "source": [
+ "# Transform items into factors\n",
+ "items_factors = model.item_factors\n",
+ "product_entries = products[[\"product_id\", \"product_name\"]].drop_duplicates()\n",
+ "product_entries[\"product_id\"] = product_entries.product_id.astype(\"int64\")\n",
+ "item_embeddings = items_factors[1:].tolist()\n",
+ "product_entries[\"vector\"] = item_embeddings\n",
+ "\n",
+ "tbl.add(product_entries)"
+ ]
},
- "ae94cc355e0c4f8b8b73824ae2ef5632": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_b07491e5db2d42b499fce4d7caddfe6f",
- "IPY_MODEL_f79ee40b5f854a8b99c57b7c5156d3cd",
- "IPY_MODEL_3c2bc0b631644bb992905d55dfe0a7a8"
- ],
- "layout": "IPY_MODEL_41ebafe8393c451f83bfd2132a677a67"
- }
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "j3aU4z-tSbWE"
+ },
+ "source": [
+ "## Let's create an ANN index in order to speed up retrieval. This might take a while."
+ ]
},
- "b07491e5db2d42b499fce4d7caddfe6f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_9b1e85ca94ef442fbd546647f72e6905",
- "placeholder": "",
- "style": "IPY_MODEL_ae1b3cb276f44f5ebab3eaf8f7b85e67",
- "value": "100%"
- }
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "id": "H8HyvjCFSeaz"
+ },
+ "outputs": [],
+ "source": [
+ "tbl.create_index(num_partitions=256, num_sub_vectors=16)"
+ ]
},
- "f79ee40b5f854a8b99c57b7c5156d3cd": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_2782769e3daa491385bcc8ae34f24f3b",
- "max": 192941,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_5d41569b941445bea2497c89d3c8e6cb",
- "value": 192941
- }
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ibNMrxyRt5HK"
+ },
+ "source": [
+ "This is a helper method for analysing recommendations later.\n",
+ "This method returns top N products that someone bought in the past (based on product quantity)."
+ ]
},
- "3c2bc0b631644bb992905d55dfe0a7a8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5e7dd2740d174064ac2d1cbc75cb5909",
- "placeholder": "",
- "style": "IPY_MODEL_a67972dc3f264b3699816257f1ad9ed7",
- "value": " 192941/192941 [02:05<00:00, 1812.48it/s]"
- }
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "id": "Uzgk5Od0t5HK"
+ },
+ "outputs": [],
+ "source": [
+ "def products_bought_by_user_in_the_past(user_id: int, top: int = 10):\n",
+ " selected = data[data.user_id == user_id].sort_values(\n",
+ " by=[\"total_orders\"], ascending=False\n",
+ " )\n",
+ "\n",
+ " selected[\"product_name\"] = selected[\"product_id\"].map(\n",
+ " product_entries.set_index(\"product_id\")[\"product_name\"]\n",
+ " )\n",
+ " selected = selected[[\"product_id\", \"product_name\", \"total_orders\"]].reset_index(\n",
+ " drop=True\n",
+ " )\n",
+ " if selected.shape[0] < top:\n",
+ " return selected\n",
+ "\n",
+ " return selected[:top]"
+ ]
},
- "41ebafe8393c451f83bfd2132a677a67": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ULyVnHEXt5HK"
+ },
+ "source": [
+ "Let's retrieve our test users so we can query for recommendations."
+ ]
},
- "9b1e85ca94ef442fbd546647f72e6905": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "id": "Wwl7yFKTt5HK"
+ },
+ "outputs": [],
+ "source": [
+ "test_user_ids = [206210, 206211]\n",
+ "test_user_factors = model.user_factors[user_to_index[test_user_ids]]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "wTh61ou3t5HL"
+ },
+ "source": [
+ "## Let's now query LanceDB to retrieve recommendations."
+ ]
},
- "ae1b3cb276f44f5ebab3eaf8f7b85e67": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 868
+ },
+ "id": "UiZg4Iset5HL",
+ "outputId": "636ed6c9-c84c-4ad8-96a5-c6b1bf6e3cf1"
+ },
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ " product_id product_name \\\n",
+ "0 46149 Zero Calorie Cola \n",
+ "1 196 Soda \n",
+ "2 22802 Mineral Water \n",
+ "3 40939 Drinking Water \n",
+ "4 31651 Extra Fancy Unsalted Mixed Nuts \n",
+ "5 37710 Trail Mix \n",
+ "6 41400 Crunchy Oats 'n Honey Granola Bars \n",
+ "7 26348 Mixed Fruit Fruit Snacks \n",
+ "8 46061 Popcorn \n",
+ "9 39657 Milk Chocolate Almonds \n",
+ "\n",
+ " vector _distance \n",
+ "0 [0.037515923, -0.030325921, 0.004221245, -0.00... 38.190578 \n",
+ "1 [0.04531822, -0.04450815, -0.0022076364, -0.02... 38.340080 \n",
+ "2 [0.030236538, -0.0041136313, 0.015683502, -0.0... 38.593525 \n",
+ "3 [0.03287196, -0.017454194, 0.009911481, -0.004... 38.606468 \n",
+ "4 [0.037796307, -0.009871203, -0.0020715303, -0.... 38.642967 \n",
+ "5 [0.05062829, -0.017916694, 0.0027849572, 0.001... 38.668938 \n",
+ "6 [0.028622035, -0.013106515, -0.0072577046, -0.... 38.703171 \n",
+ "7 [0.011525251, -0.032522, -0.021976499, 0.01198... 38.709934 \n",
+ "8 [0.039293304, -0.016017294, -0.0010792917, 0.0... 38.713402 \n",
+ "9 [0.030015469, -0.00927157, 0.0061932686, 0.000... 38.748997 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " product_id | \n",
+ " product_name | \n",
+ " vector | \n",
+ " _distance | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 46149 | \n",
+ " Zero Calorie Cola | \n",
+ " [0.037515923, -0.030325921, 0.004221245, -0.00... | \n",
+ " 38.190578 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 196 | \n",
+ " Soda | \n",
+ " [0.04531822, -0.04450815, -0.0022076364, -0.02... | \n",
+ " 38.340080 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 22802 | \n",
+ " Mineral Water | \n",
+ " [0.030236538, -0.0041136313, 0.015683502, -0.0... | \n",
+ " 38.593525 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 40939 | \n",
+ " Drinking Water | \n",
+ " [0.03287196, -0.017454194, 0.009911481, -0.004... | \n",
+ " 38.606468 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 31651 | \n",
+ " Extra Fancy Unsalted Mixed Nuts | \n",
+ " [0.037796307, -0.009871203, -0.0020715303, -0.... | \n",
+ " 38.642967 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 37710 | \n",
+ " Trail Mix | \n",
+ " [0.05062829, -0.017916694, 0.0027849572, 0.001... | \n",
+ " 38.668938 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 41400 | \n",
+ " Crunchy Oats 'n Honey Granola Bars | \n",
+ " [0.028622035, -0.013106515, -0.0072577046, -0.... | \n",
+ " 38.703171 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 26348 | \n",
+ " Mixed Fruit Fruit Snacks | \n",
+ " [0.011525251, -0.032522, -0.021976499, 0.01198... | \n",
+ " 38.709934 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 46061 | \n",
+ " Popcorn | \n",
+ " [0.039293304, -0.016017294, -0.0010792917, 0.0... | \n",
+ " 38.713402 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 39657 | \n",
+ " Milk Chocolate Almonds | \n",
+ " [0.030015469, -0.00927157, 0.0061932686, 0.000... | \n",
+ " 38.748997 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "results",
+ "summary": "{\n \"name\": \"results\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"product_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14019,\n \"min\": 196,\n \"max\": 46149,\n \"num_unique_values\": 10,\n \"samples\": [\n 46061,\n 196,\n 37710\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"product_name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Popcorn\",\n \"Soda\",\n \"Trail Mix\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"_distance\",\n \"properties\": {\n \"dtype\": \"float32\",\n \"num_unique_values\": 10,\n \"samples\": [\n 38.713401794433594,\n 38.34008026123047,\n 38.66893768310547\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ " product_id product_name total_orders\n",
+ "0 46149 Zero Calorie Cola 50"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " product_id | \n",
+ " product_name | \n",
+ " total_orders | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 46149 | \n",
+ " Zero Calorie Cola | \n",
+ " 50 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \" display(products_bought_by_user_in_the_past(id, top=15))\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"product_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 46149,\n \"max\": 46149,\n \"num_unique_values\": 1,\n \"samples\": [\n 46149\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"product_name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Zero Calorie Cola\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_orders\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 50,\n \"max\": 50,\n \"num_unique_values\": 1,\n \"samples\": [\n 50\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ " product_id product_name \\\n",
+ "0 26604 Organic Blackberries \n",
+ "1 27845 Organic Whole Milk \n",
+ "2 27966 Organic Raspberries \n",
+ "3 43352 Raspberries \n",
+ "4 21288 Blackberries \n",
+ "5 39275 Organic Blueberries \n",
+ "6 11777 Red Raspberries \n",
+ "7 9076 Blueberries \n",
+ "8 21137 Organic Strawberries \n",
+ "9 11422 Plain Greek Yogurt \n",
+ "\n",
+ " vector _distance \n",
+ "0 [0.019478824, 0.007443799, 0.004226536, 0.0283... 16.314867 \n",
+ "1 [-0.03417227, -0.053161107, 0.03893201, 0.0150... 16.432335 \n",
+ "2 [0.024305355, -0.0063351737, 0.029324768, 0.02... 16.577738 \n",
+ "3 [0.020642506, 0.025494106, 0.0050161625, 0.003... 16.588812 \n",
+ "4 [-0.00844225, 0.01996236, -0.0148576135, 0.012... 16.672234 \n",
+ "5 [0.035410225, -0.0029810749, 0.014112177, 0.00... 16.684757 \n",
+ "6 [0.020807281, -0.015660688, 0.010914551, 0.028... 16.746056 \n",
+ "7 [0.033343736, 0.0068411743, 0.0028535812, 0.00... 16.765997 \n",
+ "8 [0.018478896, -0.0014569649, 0.01558258, 0.009... 16.883642 \n",
+ "9 [0.003926732, -0.02004065, 0.059874147, 0.0318... 17.008499 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " product_id | \n",
+ " product_name | \n",
+ " vector | \n",
+ " _distance | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 26604 | \n",
+ " Organic Blackberries | \n",
+ " [0.019478824, 0.007443799, 0.004226536, 0.0283... | \n",
+ " 16.314867 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 27845 | \n",
+ " Organic Whole Milk | \n",
+ " [-0.03417227, -0.053161107, 0.03893201, 0.0150... | \n",
+ " 16.432335 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 27966 | \n",
+ " Organic Raspberries | \n",
+ " [0.024305355, -0.0063351737, 0.029324768, 0.02... | \n",
+ " 16.577738 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 43352 | \n",
+ " Raspberries | \n",
+ " [0.020642506, 0.025494106, 0.0050161625, 0.003... | \n",
+ " 16.588812 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 21288 | \n",
+ " Blackberries | \n",
+ " [-0.00844225, 0.01996236, -0.0148576135, 0.012... | \n",
+ " 16.672234 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 39275 | \n",
+ " Organic Blueberries | \n",
+ " [0.035410225, -0.0029810749, 0.014112177, 0.00... | \n",
+ " 16.684757 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 11777 | \n",
+ " Red Raspberries | \n",
+ " [0.020807281, -0.015660688, 0.010914551, 0.028... | \n",
+ " 16.746056 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 9076 | \n",
+ " Blueberries | \n",
+ " [0.033343736, 0.0068411743, 0.0028535812, 0.00... | \n",
+ " 16.765997 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 21137 | \n",
+ " Organic Strawberries | \n",
+ " [0.018478896, -0.0014569649, 0.01558258, 0.009... | \n",
+ " 16.883642 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 11422 | \n",
+ " Plain Greek Yogurt | \n",
+ " [0.003926732, -0.02004065, 0.059874147, 0.0318... | \n",
+ " 17.008499 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "results",
+ "summary": "{\n \"name\": \"results\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"product_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11503,\n \"min\": 9076,\n \"max\": 43352,\n \"num_unique_values\": 10,\n \"samples\": [\n 21137,\n 27845,\n 39275\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"product_name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Organic Strawberries\",\n \"Organic Whole Milk\",\n \"Organic Blueberries\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"_distance\",\n \"properties\": {\n \"dtype\": \"float32\",\n \"num_unique_values\": 10,\n \"samples\": [\n 16.883642196655273,\n 16.432334899902344,\n 16.684757232666016\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ " product_id product_name total_orders\n",
+ "0 27845 Organic Whole Milk 49\n",
+ "1 26604 Organic Blackberries 32"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " product_id | \n",
+ " product_name | \n",
+ " total_orders | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 27845 | \n",
+ " Organic Whole Milk | \n",
+ " 49 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 26604 | \n",
+ " Organic Blackberries | \n",
+ " 32 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \" display(products_bought_by_user_in_the_past(id, top=15))\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"product_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 877,\n \"min\": 26604,\n \"max\": 27845,\n \"num_unique_values\": 2,\n \"samples\": [\n 26604,\n 27845\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"product_name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Organic Blackberries\",\n \"Organic Whole Milk\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_orders\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12,\n \"min\": 32,\n \"max\": 49,\n \"num_unique_values\": 2,\n \"samples\": [\n 32,\n 49\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "# Query by user factors\n",
+ "test_user_embeddings = test_user_factors.tolist()\n",
+ "for embedding, id in zip(test_user_embeddings, test_user_ids):\n",
+ " results = tbl.search(embedding).limit(10).to_pandas()\n",
+ " display(results)\n",
+ " display(products_bought_by_user_in_the_past(id, top=15))"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
},
- "2782769e3daa491385bcc8ae34f24f3b": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
},
- "5d41569b941445bea2497c89d3c8e6cb": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.1"
},
- "5e7dd2740d174064ac2d1cbc75cb5909": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
+ "vscode": {
+ "interpreter": {
+ "hash": "5fe10bf018ef3e697f9035d60bf60847932a12bface18908407fd371fe880db9"
+ }
},
- "a67972dc3f264b3699816257f1ad9ed7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "c159a1c6fc204d239b5ff7713d3c68fe": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6e3b621f67554d6cbcaa50717008821f",
+ "IPY_MODEL_1e5f629b939247c088b275a72310cfe0",
+ "IPY_MODEL_cfde2bc68d9c448b823c690e15c4a169"
+ ],
+ "layout": "IPY_MODEL_8668f98cebeb4b548e87f2c4e68c9cbf"
+ }
+ },
+ "6e3b621f67554d6cbcaa50717008821f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7ebca3dced8e4c029398db02169b868e",
+ "placeholder": "",
+ "style": "IPY_MODEL_28400c62e971452b865e70af4e410afc",
+ "value": "100%"
+ }
+ },
+ "1e5f629b939247c088b275a72310cfe0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c45f8ded7dc84c18b479c3c427c29463",
+ "max": 50,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_301f4f324d594ff2a63dc2f43ba4391f",
+ "value": 50
+ }
+ },
+ "cfde2bc68d9c448b823c690e15c4a169": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0e3594636fbf4263b32d195f31fd29c0",
+ "placeholder": "",
+ "style": "IPY_MODEL_adf0848d8d8440f18dbd001572772fce",
+ "value": " 50/50 [17:12<00:00, 20.75s/it]"
+ }
+ },
+ "8668f98cebeb4b548e87f2c4e68c9cbf": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7ebca3dced8e4c029398db02169b868e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "28400c62e971452b865e70af4e410afc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "c45f8ded7dc84c18b479c3c427c29463": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "301f4f324d594ff2a63dc2f43ba4391f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "0e3594636fbf4263b32d195f31fd29c0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "adf0848d8d8440f18dbd001572772fce": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "142121b5c098477985d3bf5eb9560ad4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_49f9dab3bf2748a2a0811a7057c32ff7",
+ "IPY_MODEL_3ea9a47313cd496694180de85b51decf",
+ "IPY_MODEL_1cd7d3c410ed449eb88cc8d78e49e10d"
+ ],
+ "layout": "IPY_MODEL_e66f741c3e794c69a328c715cc9b56a2"
+ }
+ },
+ "49f9dab3bf2748a2a0811a7057c32ff7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_4a785b8e4b0d43eca0cf41c2b1cb2f35",
+ "placeholder": "",
+ "style": "IPY_MODEL_05369b050a61407f8cd0c657afb9a6bd",
+ "value": "100%"
+ }
+ },
+ "3ea9a47313cd496694180de85b51decf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9ffbed3caaf84e1db7bde609b6cc06a7",
+ "max": 192999,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6142e63dd35c46839b9b8cd520750844",
+ "value": 192999
+ }
+ },
+ "1cd7d3c410ed449eb88cc8d78e49e10d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_cb770a9f4224470bba0a7488b76a24c0",
+ "placeholder": "",
+ "style": "IPY_MODEL_2dea74cc01b04e548bb7a77bd31a2fd2",
+ "value": " 192999/192999 [02:18<00:00, 1522.55it/s]"
+ }
+ },
+ "e66f741c3e794c69a328c715cc9b56a2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4a785b8e4b0d43eca0cf41c2b1cb2f35": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "05369b050a61407f8cd0c657afb9a6bd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "9ffbed3caaf84e1db7bde609b6cc06a7": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6142e63dd35c46839b9b8cd520750844": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "cb770a9f4224470bba0a7488b76a24c0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2dea74cc01b04e548bb7a77bd31a2fd2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ }
+ }
}
- }
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
}
\ No newline at end of file