diff --git a/README.md b/README.md index aa8f485..9921fcf 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,50 @@ # ChromaViz -A package for visualising vector embedding collections as part of the [Chroma](https://trychroma.com) vector database. +A package for visualising vector embedding collections as part of the [Chroma](https://trychroma.com) vector database. -Uses [Flask](https://flask.palletsprojects.com/en/2.3.x/), [Vite](https://vitejs.dev), and [react-three-fiber](https://github.com/pmndrs/react-three-fiber) to host a live 3D view of the data in a web browser, should perform well up to 10k+ documents. Dimensional reduction is performed using PCA for colors down to 50 dimensions, followed by tSNE down to 3. +Uses [Flask](https://flask.palletsprojects.com/en/2.3.x/), [Vite](https://vitejs.dev), +and [react-three-fiber](https://github.com/pmndrs/react-three-fiber) to host a live 3D view of the data in a web +browser, should perform well up to 10k+ documents. Dimensional reduction is performed using PCA for colors down to 50 +dimensions, followed by tSNE down to 3. ## How to Use + `pip install chromaviz` or `pip install git+https://github.com/mtybadger/chromaviz/`. -After installing from pip, simply call `visualize_collection` with a valid ChromaDB collection, and chromaviz will do the rest. -``` +After installing from pip, simply call `visualize_collection` with a valid ChromaDB collection, and chromaviz will do +the rest. + +```python from chromaviz import visualize_collection visualize_collection(chromadb.Collection) ``` -It also works with Langchain+Chroma, as in: + +Visualization of query results: + +```python +from chromaviz import visualize_collection +import chromadb +client = chromadb.HttpClient() +collection = client.get_collection("my_collection") +visualize_collection(collection,query="My question goes here",n_result=100) ``` + +It also works with Langchain+Chroma, as in: + +```python from langchain.vectorstores import Chroma vectordb = Chroma.from_documents(data, embeddings, ids) from chromaviz import visualize_collection visualize_collection(vectordb._collection) ``` + ## Screenshots + ![Screenshot of ChromaViz on a biological dataset](/images/1.png) ![Screenshot of ChromaViz close up](/images/2.png) ## To-Do + - [ ] More dimensional reduction options and flexibility - [ ] Refactor extremely shoddy React code - [ ] Improve UX diff --git a/chromaviz/visualize.py b/chromaviz/visualize.py index b0608de..623bdd5 100644 --- a/chromaviz/visualize.py +++ b/chromaviz/visualize.py @@ -11,58 +11,61 @@ import webbrowser import importlib.resources - + app = Flask(__name__) CORS(app) from flask import cli from flask import Response from flask import request + cli.show_server_banner = lambda *_: None data = [[]] - @app.route("/") def hello_world(): with importlib.resources.open_text("chromaviz", "index.html") as file: - contents = file.read() - return contents + contents = file.read() + return contents + @app.route('/assets/') def serve_assets(filename): - mime = 'text/html' - if(".js" in filename): - mime = 'text/javascript' - if('.css' in filename): + if (".js" in filename): + mime = 'text/javascript' + if ('.css' in filename): mime = 'text/css' # Logic to serve the assets # Here, you can use the `filename` parameter to determine which asset to serve # You can use the `url_for` function to generate the URL for the asset dynamically with importlib.resources.open_text("chromaviz", filename) as file: - contents = file.read() - return Response(contents, mimetype=mime) + contents = file.read() + return Response(contents, mimetype=mime) + @app.route("/data") def data_api(): + global data df = pd.DataFrame.from_dict(data=data["embeddings"]) print(df) print('Size of the dataframe: {}'.format(df.shape)) - + pca_50 = PCA(n_components=50) pca_result_50 = pca_50.fit_transform(df) - print('Cumulative explained variation for 50 principal components: {}'.format(np.sum(pca_50.explained_variance_ratio_))) + print('Cumulative explained variation for 50 principal components: {}'.format( + np.sum(pca_50.explained_variance_ratio_))) time_start = time.time() tsne = TSNE(n_components=3, verbose=0, perplexity=40, n_iter=300) tsne_pca_results = tsne.fit_transform(pca_result_50) - print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start)) + print('t-SNE done! Time elapsed: {} seconds'.format(time.time() - time_start)) tsne_pca_results = tsne_pca_results / 3 pca_3 = PCA(n_components=3) @@ -72,22 +75,43 @@ def data_api(): groups = np.argmax(pca_result_50, axis=1) points = [] - for position, document, metadata, id, group in zip(tsne_pca_results.tolist(), data["documents"], data["metadatas"], data["ids"], groups.tolist()): + for position, document, metadata, id, group in zip(tsne_pca_results.tolist(), data["documents"], data["metadatas"], + data["ids"], groups.tolist()): point = { - 'position': position, - 'document': document, - 'metadata': metadata, - 'id': id, - 'group': group + 'position': position, + 'document': document, + 'metadata': metadata, + 'id': id, + 'group': group } points.append(point) return json.dumps({'points': points}) + client = chromadb.Client() -def visualize_collection(col: chromadb.api.models.Collection.Collection): + +def visualize_collection(col: chromadb.api.models.Collection.Collection, query: str = None, + n_results: int = 50) -> None: global data - data = col.get(include=["documents", "metadatas", "embeddings"]) - webbrowser.open('http://127.0.0.1:5000') + if query is not None: + if n_results is None: + n_results = 50 + if n_results < 50: + print("Warning: n_results is less than 50. This may lead to unexpected results.") + n_results = 50 + result = col.query(query_texts=[query], n_results=n_results, include=["documents", "metadatas", "embeddings"]) + if len(result["ids"]) < 50: + raise Exception("Query returned less than 50 results. This may lead to unexpected results.") + data = { + "ids": [id for id in result["ids"][0]] if result["ids"] else None, + "embeddings": [embed for embed in result["embeddings"][0]] if result["embeddings"] else None, + "documents": [doc for doc in result["documents"][0]] if result["documents"] else None, + "metadatas": [meta for meta in result["metadatas"][0]] if result["metadatas"] else None, + "distances": [dist for dist in result["distances"][0]] if result["distances"] else None + } + else: + data = col.get(include=["documents", "metadatas", "embeddings"]) + webbrowser.open('http://127.0.0.1:5000') app.run(port=5000, debug=False) - return \ No newline at end of file + return diff --git a/examples/query_viz.ipynb b/examples/query_viz.ipynb new file mode 100644 index 0000000..a6302cd --- /dev/null +++ b/examples/query_viz.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2023-08-11T12:22:01.978273Z", + "start_time": "2023-08-11T12:21:58.029054Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: sentence_transformers in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (2.2.2)\r\n", + "Requirement already satisfied: essential-generators in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (1.0)\r\n", + "Requirement already satisfied: openai in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (0.27.8)\r\n", + "Requirement already satisfied: chromadb in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (0.4.5)\r\n", + "Requirement already satisfied: huggingface-hub>=0.4.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from sentence_transformers) (0.16.4)\r\n", + "Requirement already satisfied: scikit-learn in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from sentence_transformers) (1.3.0)\r\n", + "Requirement already satisfied: scipy in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from sentence_transformers) (1.11.1)\r\n", + "Requirement already satisfied: transformers<5.0.0,>=4.6.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from sentence_transformers) (4.31.0)\r\n", + "Requirement already satisfied: torchvision in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from sentence_transformers) (0.15.2)\r\n", + "Requirement already satisfied: tqdm in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from sentence_transformers) (4.66.1)\r\n", + "Requirement already satisfied: torch>=1.6.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from sentence_transformers) (2.0.1)\r\n", + "Requirement already satisfied: nltk in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from sentence_transformers) (3.8.1)\r\n", + "Requirement already satisfied: numpy in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from sentence_transformers) (1.25.2)\r\n", + "Requirement already satisfied: sentencepiece in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from sentence_transformers) (0.1.99)\r\n", + "Requirement already satisfied: aiohttp in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from openai) (3.8.5)\r\n", + "Requirement already satisfied: requests>=2.20 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from openai) (2.31.0)\r\n", + "Requirement already satisfied: onnxruntime>=1.14.1 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from chromadb) (1.15.1)\r\n", + "Requirement already satisfied: pydantic<2.0,>=1.9 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from chromadb) (1.10.12)\r\n", + "Requirement already satisfied: posthog>=2.4.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from chromadb) (3.0.1)\r\n", + "Requirement already satisfied: uvicorn[standard]>=0.18.3 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from chromadb) (0.23.2)\r\n", + "Requirement already satisfied: pulsar-client>=3.1.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from chromadb) (3.2.0)\r\n", + "Requirement already satisfied: chroma-hnswlib==0.7.2 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from chromadb) (0.7.2)\r\n", + "Requirement already satisfied: pypika>=0.48.9 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from chromadb) (0.48.9)\r\n", + "Requirement already satisfied: overrides>=7.3.1 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from chromadb) (7.4.0)\r\n", + "Requirement already satisfied: fastapi<0.100.0,>=0.95.2 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from chromadb) (0.99.1)\r\n", + "Requirement already satisfied: tokenizers>=0.13.2 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from chromadb) (0.13.3)\r\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from chromadb) (4.7.1)\r\n", + "Requirement already satisfied: importlib-resources in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from chromadb) (6.0.1)\r\n", + "Requirement already satisfied: starlette<0.28.0,>=0.27.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from fastapi<0.100.0,>=0.95.2->chromadb) (0.27.0)\r\n", + "Requirement already satisfied: filelock in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (3.12.2)\r\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (6.0.1)\r\n", + "Requirement already satisfied: packaging>=20.9 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (23.1)\r\n", + "Requirement already satisfied: fsspec in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2023.6.0)\r\n", + "Requirement already satisfied: coloredlogs in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\r\n", + "Requirement already satisfied: protobuf in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from onnxruntime>=1.14.1->chromadb) (4.24.0)\r\n", + "Requirement already satisfied: flatbuffers in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from onnxruntime>=1.14.1->chromadb) (23.5.26)\r\n", + "Requirement already satisfied: sympy in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\r\n", + "Requirement already satisfied: backoff>=1.10.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from posthog>=2.4.0->chromadb) (2.2.1)\r\n", + "Requirement already satisfied: python-dateutil>2.1 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from posthog>=2.4.0->chromadb) (2.8.2)\r\n", + "Requirement already satisfied: monotonic>=1.5 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from posthog>=2.4.0->chromadb) (1.6)\r\n", + "Requirement already satisfied: six>=1.5 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from posthog>=2.4.0->chromadb) (1.16.0)\r\n", + "Requirement already satisfied: certifi in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from pulsar-client>=3.1.0->chromadb) (2023.7.22)\r\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from requests>=2.20->openai) (2.0.4)\r\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from requests>=2.20->openai) (3.4)\r\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from requests>=2.20->openai) (3.2.0)\r\n", + "Requirement already satisfied: jinja2 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from torch>=1.6.0->sentence_transformers) (3.1.2)\r\n", + "Requirement already satisfied: networkx in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from torch>=1.6.0->sentence_transformers) (3.1)\r\n", + "Requirement already satisfied: regex!=2019.12.17 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (2023.8.8)\r\n", + "Requirement already satisfied: safetensors>=0.3.1 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (0.3.2)\r\n", + "Requirement already satisfied: click>=7.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (8.1.6)\r\n", + "Requirement already satisfied: h11>=0.8 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\r\n", + "Requirement already satisfied: websockets>=10.4 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (11.0.3)\r\n", + "Requirement already satisfied: httptools>=0.5.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.0)\r\n", + "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.17.0)\r\n", + "Requirement already satisfied: watchfiles>=0.13 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.19.0)\r\n", + "Requirement already satisfied: python-dotenv>=0.13 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.0)\r\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from aiohttp->openai) (23.1.0)\r\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from aiohttp->openai) (1.9.2)\r\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from aiohttp->openai) (1.3.1)\r\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from aiohttp->openai) (4.0.3)\r\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from aiohttp->openai) (6.0.4)\r\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from aiohttp->openai) (1.4.0)\r\n", + "Requirement already satisfied: zipp>=3.1.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from importlib-resources->chromadb) (3.16.2)\r\n", + "Requirement already satisfied: joblib in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from nltk->sentence_transformers) (1.3.2)\r\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from scikit-learn->sentence_transformers) (3.2.0)\r\n", + "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from torchvision->sentence_transformers) (10.0.0)\r\n", + "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (3.7.1)\r\n", + "Requirement already satisfied: humanfriendly>=9.1 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\r\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from jinja2->torch>=1.6.0->sentence_transformers) (2.1.3)\r\n", + "Requirement already satisfied: mpmath>=0.19 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\r\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.3.0)\r\n", + "Requirement already satisfied: exceptiongroup in /Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.1.2)\r\n", + "\r\n", + "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip available: \u001B[0m\u001B[31;49m22.3.1\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.2.1\u001B[0m\r\n", + "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/tazarov/PycharmProjects/chromaviz/.venv/lib/python3.9/site-packages/urllib3/__init__.py:34: NotOpenSSLWarning: urllib3 v2.0 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "!pip install sentence_transformers essential-generators openai chromadb\n", + "import chromadb\n", + "from chromadb.utils import embedding_functions\n", + "from essential_generators import DocumentGenerator" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "from chromaviz import visualize_collection" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-11T12:23:08.130088Z", + "start_time": "2023-08-11T12:23:07.006064Z" + } + }, + "id": "8a9325668b19773e" + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=\"all-MiniLM-L6-v2\")\n", + "client = chromadb.HttpClient()\n", + "client.heartbeat()\n", + "client.reset()\n", + "collection = client.get_or_create_collection(\"test-where-list\", embedding_function=sentence_transformer_ef)\n", + "gen = DocumentGenerator()\n", + "collection.add(documents=[gen.sentence() for _ in range(100)],\n", + " metadatas=[{\"id\":f\"{i}\"} for i in range(100)], ids=[f\"{i}\" for i in range(100)])" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-11T12:23:17.368057Z", + "start_time": "2023-08-11T12:23:13.013257Z" + } + }, + "id": "5d94f61c8bbdf03f" + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001B[31m\u001B[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.\u001B[0m\n", + " * Running on http://127.0.0.1:5000\n", + "\u001B[33mPress CTRL+C to quit\u001B[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "127.0.0.1 - - [11/Aug/2023 15:23:20] \"GET / HTTP/1.1\" 200 -\n", + "127.0.0.1 - - [11/Aug/2023 15:23:20] \"GET /assets/index-8146af32.js HTTP/1.1\" 200 -\n", + "127.0.0.1 - - [11/Aug/2023 15:23:20] \"GET /assets/index-26d05a53.css HTTP/1.1\" 200 -\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1 2 3 4 5 6 \\\n", + "0 0.047956 0.048246 -0.035767 -0.024743 -0.012206 0.033731 -0.062368 \n", + "1 0.008977 -0.051077 0.036565 0.060840 0.010508 -0.067851 0.005352 \n", + "2 -0.022052 0.083774 -0.027610 -0.026683 0.003508 0.004977 -0.038088 \n", + "3 0.063737 0.033926 0.035185 0.030046 0.054325 -0.090258 -0.102009 \n", + "4 0.041031 0.016509 -0.010663 0.060795 -0.035068 -0.054979 -0.007326 \n", + ".. ... ... ... ... ... ... ... \n", + "95 0.090940 -0.005342 0.068146 -0.022158 0.020105 -0.004669 0.025452 \n", + "96 -0.047173 0.005422 -0.041794 0.058701 0.055946 0.049700 0.070943 \n", + "97 -0.018983 0.033073 0.026989 -0.044341 -0.067015 0.025665 0.011343 \n", + "98 -0.050748 -0.018541 -0.080050 -0.050101 0.042949 0.004870 0.031745 \n", + "99 0.045831 0.013761 0.028145 0.005134 -0.008831 0.017450 -0.011697 \n", + "\n", + " 7 8 9 ... 374 375 376 377 \\\n", + "0 0.085702 -0.092749 0.050016 ... 0.036410 0.000398 0.027014 0.029483 \n", + "1 -0.094669 0.027327 0.003595 ... -0.012380 0.023044 -0.019520 0.034101 \n", + "2 -0.058914 0.037812 0.090190 ... 0.060514 -0.007686 0.059811 -0.034086 \n", + "3 -0.022523 0.004008 0.062823 ... -0.013843 -0.037828 -0.019402 -0.068327 \n", + "4 0.001684 -0.011120 -0.052800 ... 0.056987 0.024361 0.053875 -0.151887 \n", + ".. ... ... ... ... ... ... ... ... \n", + "95 0.006319 -0.088153 -0.060326 ... 0.018106 0.071526 -0.007053 -0.001869 \n", + "96 0.023004 -0.002010 -0.049902 ... 0.019225 0.035239 -0.011165 -0.037088 \n", + "97 0.007467 0.040676 -0.000230 ... 0.020716 0.030469 0.035451 -0.035325 \n", + "98 0.099603 0.067157 0.040993 ... -0.023769 0.009465 -0.038780 -0.032280 \n", + "99 -0.118562 0.070116 0.034440 ... 0.004740 0.080184 0.063883 0.030379 \n", + "\n", + " 378 379 380 381 382 383 \n", + "0 0.050659 0.009830 0.052305 -0.055695 -0.001139 -0.070266 \n", + "1 0.049218 -0.041152 0.088790 0.010561 0.002722 0.023056 \n", + "2 -0.023065 -0.035319 0.007935 0.053336 -0.001427 -0.039399 \n", + "3 0.040637 0.041705 -0.001027 -0.033376 -0.022719 0.083681 \n", + "4 -0.024111 0.022669 0.056104 -0.041444 -0.010172 0.001833 \n", + ".. ... ... ... ... ... ... \n", + "95 0.039771 0.021086 -0.055066 0.001575 -0.041391 -0.005716 \n", + "96 -0.064287 -0.047397 0.079567 -0.056427 0.090617 0.034375 \n", + "97 0.021464 0.063710 0.053537 -0.051077 -0.010436 0.025895 \n", + "98 -0.039581 0.005371 0.033909 0.022587 0.049444 0.023668 \n", + "99 -0.026245 -0.027462 -0.034823 -0.067455 0.051488 -0.052001 \n", + "\n", + "[100 rows x 384 columns]\n", + "Size of the dataframe: (100, 384)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "127.0.0.1 - - [11/Aug/2023 15:23:22] \"GET /assets/glasses-5d966a6f.svg HTTP/1.1\" 200 -\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cumulative explained variation for 50 principal components: 0.815100891836756\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "127.0.0.1 - - [11/Aug/2023 15:23:22] \"GET /data HTTP/1.1\" 200 -\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "t-SNE done! Time elapsed: 0.5051560401916504 seconds\n" + ] + } + ], + "source": [ + "visualize_collection(collection)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-08-11T12:23:43.585430Z", + "start_time": "2023-08-11T12:23:19.702982Z" + } + }, + "id": "5e221bf2af41bae2" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index 6b1fc93..76f838b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "chromaviz" -version = "0.0.4" +version = "0.0.5" authors = [ { name="Spruce Campbell", email="spruce@mit.edu" }, ] diff --git a/setup.cfg b/setup.cfg index 1748e79..dba61e6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = chromaviz -version = 0.0.3 +version = 0.0.5 author = Spruce Campbell author_email = spruce@mit.edu url = https://github.com/mtybadger/chromaviz diff --git a/setup.py b/setup.py index b39e82b..93443d7 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ if __name__ == "__main__": setuptools.setup( name="chromaviz", - version="0.0.4", + version="0.0.5", url="https://github.com/mtybadger/chromaviz", author="Spruce Campbell", author_email="spruce@mit.edu",