diff --git a/LICENSE b/LICENSE index be6b66989..af4dec1d2 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,10 @@ Copyright 2020-2022 Jina AI Limited. All rights reserved. +The following three files are licensed under MIT License via https://github.com/openai/CLIP Copyright (c) 2021 OpenAI + server/clip_server/model/clip.py + server/clip_server/model/model.py + server/clip_server/model/simple_tokenizer.py + Apache License Version 2.0, January 2004 diff --git a/docs/conf.py b/docs/conf.py index ba0e1e225..a7529e822 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,12 +60,9 @@ "color-brand-primary": "#FBCB67", "color-brand-content": "#FBCB67", }, - # PLEASE DO NOT DELETE the empty line between `start-announce` and `end-announce` # PLEASE DO NOT DELETE `start-announce`/ `end-announce` it is used for our dev bot to inject announcement from GH - # start-announce - # end-announce } @@ -77,7 +74,7 @@ ] html_js_files = [ 'https://cdn.jsdelivr.net/npm/vue@2/dist/vue.min.js', - ] +] htmlhelp_basename = slug html_show_sourcelink = False html_favicon = '_static/favicon.png' @@ -110,7 +107,7 @@ 'sphinx_inline_tabs', ] -myst_enable_extensions = ['colon_fence', 'substitution'] +myst_enable_extensions = ['colon_fence', 'substitution', 'deflist'] # -- Custom 404 page @@ -128,7 +125,7 @@ } notfound_no_urls_prefix = True -apidoc_module_dir = repo_dir +apidoc_module_dir = '../client' apidoc_output_dir = 'api' apidoc_excluded_paths = ['tests', 'legacy', 'hub', 'toy*', 'setup.py'] apidoc_separate_modules = True @@ -193,14 +190,18 @@ def add_server_address(app): js_text = "var server_address = '%s';" % server_address app.add_js_file(None, body=js_text) + def configure_qa_bot_ui(app): # This sets the server address to server_address = app.config['server_address'] - js_text = """ + js_text = ( + """ document.addEventListener('DOMContentLoaded', function() { document.querySelector('qa-bot').setAttribute('server', '%s'); }); - """ % server_address + """ + % server_address + ) app.add_js_file(None, body=js_text) @@ -235,4 +236,4 @@ def setup(app): # default=os.getenv('JINA_DOCSBOT_SERVER', 'https://jina-ai-docarray.docsqa.jina.ai'), # rebuild='', # ) - # app.connect('builder-inited', configure_qa_bot_ui) \ No newline at end of file + # app.connect('builder-inited', configure_qa_bot_ui) diff --git a/docs/index.md b/docs/index.md index 81e395d97..36fa1b19b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -113,6 +113,15 @@ user-guides/faq ``` +```{toctree} +:caption: Developer References +:hidden: +:maxdepth: 1 + +api/clip_client +changelog/index +``` + --- {ref}`genindex` | {ref}`modindex` diff --git a/docs/user-guides/client.md b/docs/user-guides/client.md index 1ab187f1a..f748cf5c6 100644 --- a/docs/user-guides/client.md +++ b/docs/user-guides/client.md @@ -1,6 +1,6 @@ # Client API -CLIP-as-service is designed in a client-server architecture. A client sends images and texts to the server, and receives the embeddings from the server. Additionally, `clip_client` has many nice designs for speeding up the encoding on large amount of data: +CLIP-as-service is designed in a client-server architecture. A client sends images and texts to the server, and receives the embeddings from the server. Additionally, {class}`~clip_client.client.Client` has many nice designs for speeding up the encoding on large amount of data: - Streaming: request sending is *not* blocked by the response receiving. Sending and receiving are two separate streams that run in parallel. Both are independent and each have separate internal buffer. - Batching: large requests are segmented into small batches and send in a stream. - Low memory footprint: only load data when needed. @@ -41,7 +41,7 @@ scheme://netloc:port ## Encoding -Client provides `.encode()` function that allows you to send sentences, images to the server in a streaming and sync/async manner. Encoding here means getting the fixed-length vector representation of a sentence or image. +Client provides {func}`~clip_client.client.Client.encode` function that allows you to send sentences, images to the server in a streaming and sync/async manner. Encoding here means getting the fixed-length vector representation of a sentence or image. `.encode()` supports two basic input types: - **An iterable of `str`**, e.g. `List[str]`, `Tuple[str]`, `Generator[str]` are all acceptable. @@ -213,7 +213,7 @@ Here are some suggestions when encoding large number of Documents: ## Async encoding -To encode Document in an asynchronous manner, one can use `.aencode()`. +To encode Document in an asynchronous manner, one can use {func}`~clip_client.client.Client.aencode`. ```{tip} Despite the sexy word "async", I often found many data scientists have a misconception about asynchronous. And their motivation of using async function is often wrong. _Async is not a silver bullet._ In a simple language, you will only need `.aencode()` when there is another concurrent task that is also async. Then you want to "overlap" the time spending of these two tasks. @@ -252,7 +252,7 @@ The final time cost will be less than `3s + time(t2)`. (profiling)= ## Profiling -You can use `client.profile()` to give a quick test on the server to make sure everything is good. +You can use {func}`~clip_client.client.Client.profile` to give a quick test on the server to make sure everything is good. ```python from clip_client import Client @@ -272,7 +272,7 @@ This give you a tree-like table showing the latency and percentage. └── CLIP model 4ms 100% ``` -Underneath `.profile()` sends a single empty Document to the CLIP-server for encoding and calculates a summary of latency. The above tree can be read as follows: +Under the hood, `.profile()` sends a single empty Document to the CLIP-server for encoding and calculates a summary of latency. The above tree can be read as follows: - From calling `client.encode()` to returning the results, everything counted, takes 16ms to finish. - Among them the time spent on the server is 4ms, the remaining 12ms is spent on the client-server communication, request packing, response unpacking. diff --git a/docs/user-guides/faq.md b/docs/user-guides/faq.md index ed2df6c8d..ecf45c875 100644 --- a/docs/user-guides/faq.md +++ b/docs/user-guides/faq.md @@ -3,56 +3,53 @@ This is a list of Frequently Asked Questions about CLIP-as-service. Feel free to suggest new entries! -#### What is CLIP model? +What is CLIP model? +: Developed by OpenAI, CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. The original CLIP Github repository [is here](https://github.com/openai/CLIP). The introduction of the CLIP model can [be found here](https://openai.com/blog/clip/). -Developed by OpenAI, CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. The original CLIP Github repository can be found [here](https://github.com/openai/CLIP). +Do I need to install `clip-server` and `clip-client` together? +: No. You can install them separately on different machines. For example, on a GPU server, you just need `clip-server`; on your laptop, you just need `clip-client`. -#### Do I need to install `clip-server` and `clip-client` together? +What is CLIP-as-service based on? The codebase seems quite small +: CLIP-as-service leverages features from [Jina](https://github.com/jina-ai/jina), which itself utilizes [DocArray](https://github.com/jina-ai/docarray). Thanks to them CLIP-as-service can be quickly built with solid infrastructure and rich features. -No. You can install them separately on different machines. On a GPU server, you just need `clip-server`; on your laptop, you just need `clip-client` +I had this AioRpcError, what should I do? +: If you encounter the following errors, it means you client can not connect to the server. -#### What is CLIP-as-service powered by? The codebase seems quite small - -CLIP-as-service leverages features from [Jina](https://github.com/jina-ai/jina) and [DocArray](https://github.com/jina-ai/docarray). Thanks to them CLIP-as-service can be quickly built with solid infrastructure and rich features. - -#### I had this AioRpcError, what should I do? - -If you encounter the following errors, it means you client can not connect to the server. - -```text + ```text GRPCClient@28632[E]:gRPC error: StatusCode.UNAVAILABLE failed to connect to all addresses -the ongoing request is terminated as the server is not available or closed already -``` - -```text -AioRpcError: `. If it still throws the same error, then your connection is broken. - -While it is hard to pinpoint a network problem, also out of the scope of CLIP-as-service, we here provide you a checklist that may help you to diagnose the problem: -- Are the IP address, port, and protocol all correct? -- Is client and server under the same network, or a different network? -- Is your server down? -- Is server's port open to public? -- Is there a firewall on the server side that restricts the port? -- Is there a firewall on the client side that restricts the port? -- Is the security group (on Cloud providers) correctly configured? - -#### Why "CLIP-as-service" why not "CLIP-as-a-service" - -Kind of pay homage to BERT-as-service. It is not about grammatically correct anyhow. - -#### What happened to the BERT-as-service. - -There has been no maintenance of BERT-as-service since Feb. 2019. - -CLIP-as-service is a huge upgrade of BERT-as-service, with more powerful universal embedding models that can handle both images and texts; and more solid and efficient microservice infrastructure developed in the last 2 years by Jina AI. The high-level API, especially the client side, is a drop-in replacement of the old BERT-as-service. \ No newline at end of file + the ongoing request is terminated as the server is not available or closed already + ``` + + ```text + AioRpcError: `. If it still throws the same error, then your connection is broken. + + While it is hard to pinpoint a network problem, also out of the scope of CLIP-as-service, we here provide you a checklist that may help you to diagnose the problem: + - Are the IP address, port, and protocol all correct? + - Is client and server under the same network, or a different network? + - Is your server down? + - Is server's port open to public? + - Is there a firewall on the server side that restricts the port? + - Is there a firewall on the client side that restricts the port? + - Is the security group (on Cloud providers) correctly configured? + +Why "CLIP-as-service" why not "CLIP-as-a-service" +: Kind of pay homage to BERT-as-service. It is not about grammatically correct anyhow. + +What happened to the BERT-as-service. +: There has been no maintenance of BERT-as-service since Feb. 2019. + + CLIP-as-service is a huge upgrade of BERT-as-service, with more powerful universal embedding models that can handle both images and texts; and more solid and efficient microservice infrastructure developed in the last 2 years by Jina AI. The high-level API, especially the client side, is a drop-in replacement of the old BERT-as-service. + +Where can I find the old codebase of BERT-as-service. +: In the [`bert-as-service` branch](https://github.com/jina-ai/clip-as-service/tree/bert-as-service) of the repository. \ No newline at end of file diff --git a/server/clip_server/model/clip.py b/server/clip_server/model/clip.py index 90fd51824..2112e6729 100644 --- a/server/clip_server/model/clip.py +++ b/server/clip_server/model/clip.py @@ -1,3 +1,5 @@ +# Originally from https://github.com/openai/CLIP. MIT License, Copyright (c) 2021 OpenAI + import os import urllib import warnings diff --git a/server/clip_server/model/model.py b/server/clip_server/model/model.py index 8a70ff6e8..2e1126f0e 100644 --- a/server/clip_server/model/model.py +++ b/server/clip_server/model/model.py @@ -1,3 +1,5 @@ +# Originally from https://github.com/openai/CLIP. MIT License, Copyright (c) 2021 OpenAI + from collections import OrderedDict from typing import Tuple, Union diff --git a/server/clip_server/model/simple_tokenizer.py b/server/clip_server/model/simple_tokenizer.py index af7e48964..de2a86659 100644 --- a/server/clip_server/model/simple_tokenizer.py +++ b/server/clip_server/model/simple_tokenizer.py @@ -1,3 +1,5 @@ +# Originally from https://github.com/openai/CLIP. MIT License, Copyright (c) 2021 OpenAI + import gzip import html import os