Mix.install([
{:sqlite_vec, github: "joelpaulkoch/sqlite_vec"},
{:ecto, "~> 3.12"},
{:ecto_sql, "~> 3.12"},
{:ecto_sqlite3, "~> 0.17.2"},
{:kino, "~> 0.14.1"},
{:nx, "~> 0.9.1"},
{:bumblebee, "~> 0.6.0"},
{:exla, "~> 0.9.0"},
{:axon, "~> 0.7.0"},
{:text_chunker, "~> 0.3.1"},
{:req, "~> 0.5.8"},
{:plug, "~> 1.16"}
])
Nx.global_default_backend(EXLA.Backend)
hex2context
is a Livebook companion to hex2txt
.
hex2context
uses Retrieval-Augmented Generation (RAG) to include only the most relevant snippets of documentation from the llms.txt
files generated by hex2txt
.
(Excluding documentation that is not as relevant to the task-at-hand make more efficient use of limited LLM context window sizes, and reduces inference costs.)
First, generate embeddings for every combination of package/version that you will be interested in querying later. For example:
Hex2context.ingest_docs([
{"phoenix", "1.7.18"},
{"phoenix_html", "4.1.1"},
{"phoenix_live_view", "1.0.1"},
{"flop", "0.26.1"},
{"flop_phoenix", "0.23.1"},
{"ecto", "3.12.5"}
])
Embeddings are generated locally (i.e., on the computer running this Livebook) and durably written to a Sqlite database in the same folder on disk that this notebook is running from. For larger packages (like many in the example above), generating these embeddings may take some time. However, once persisted, they do not need to be re-computed.
This notebook uses Kino.Proxy
to expose the following HTTP API:
GET /proxy/sessions/:id/:package_name/:package_version?query=:query
Here:
:id
is the identifier of the current Livebook session (check your browser's address bar for the identifier of your current session; more info):package_name
is the name of the package to query documentation for:package_version
is the version number of the package to query documentation for:query
is the (URL-encoded) query used for similarity search
Documentation relevant to the provided :query
(for the indicated package) will be returned from this endpoint, and can be fed in directly to your AI coding assistnat of choice.
Note that this endpoint will return an error if embeddings for the indicated package (name and version) have not been ingested yet.
For example:
curl "http://localhost:52039/proxy/sessions/6lq7fwziy23shg77c7vjzjtqnoy4hlpfkzhbsqsiuel7vaqr/phoenix_live_view/1.0.0/?query=phx-click"
With thanks to the following resources:
defmodule Hex2context.Repo do
use Ecto.Repo,
otp_app: :hex2context,
adapter: Ecto.Adapters.SQLite3
end
Kino.start_child(
{Hex2context.Repo,
database: Path.join(__DIR__, "hex2context.db"), load_extensions: [SqliteVec.path()]}
)
defmodule Hex2context.Repo.Migrations.CreateEmbeddingsTable do
use Ecto.Migration
def up do
execute(~s"
CREATE TABLE embeddings(
id INTEGER PRIMARY KEY,
package_name TEXT NOT NULL,
package_version TEXT NOT NULL,
doc_chunk TEXT NOT NULL,
embedding FLOAT[384] NOT NULL,
UNIQUE(package_name, package_version, doc_chunk)
);
CREATE INDEX idx_package ON embeddings(package_name, package_version);
")
end
def down do
execute("DROP TABLE embeddings")
end
end
defmodule Hex2context.Repo.Migrations do
alias Hex2context.Repo.Migrations.CreateEmbeddingsTable
def migrate, do: Ecto.Migrator.up(Hex2context.Repo, 1, CreateEmbeddingsTable)
def rollback, do: Ecto.Migrator.down(Hex2context.Repo, 1, CreateEmbeddingsTable)
end
Hex2context.Repo.Migrations.migrate()
defmodule Hex2context.Serving do
def build_serving_for_embeddings() do
repo = {:hf, "sentence-transformers/all-MiniLM-L6-v2"}
{:ok, model_info} = Bumblebee.load_model(repo)
{:ok, tokenizer} = Bumblebee.load_tokenizer(repo)
Bumblebee.Text.text_embedding(
model_info,
tokenizer,
output_pool: :mean_pooling,
output_attribute: :hidden_state,
embedding_processor: :l2_norm,
compile: [batch_size: 1, sequence_length: [2000]],
defn_options: [compiler: EXLA]
)
end
end
Kino.start_child(
{Nx.Serving,
serving: Hex2context.Serving.build_serving_for_embeddings(),
name: Hex2context.EmbeddingServing,
batch_timeout: 100}
)
defmodule Hex2context.Embedding do
use Ecto.Schema
schema "embeddings" do
field(:embedding, SqliteVec.Ecto.Float32)
field(:package_name, :string)
field(:package_version, :string)
field(:doc_chunk, :string)
end
end
defmodule Hex2context do
require Logger
import Ecto.Query
import SqliteVec.Ecto.Query
alias Hex2context.Embedding
@hex2txt "https://hex2txt.fly.dev"
@req Req.new()
@chunk_retrieval_limit 20
def ingest_docs(packages_list) when is_list(packages_list) do
for {package_name, package_version} <- packages_list do
ingest_docs(package_name, package_version)
end
:ok
end
def ingest_docs(package_name, package_version, opts \\ []) do
force_refresh? = Keyword.get(opts, :force, false)
already_loaded? = has_docs?(package_name, package_version)
case {already_loaded?, force_refresh?} do
{true, false} ->
Logger.info("Docs already ingested for #{package_name} v#{package_version}; specify `force: true` to refresh")
:ok
_ ->
fetch_docs_and_generate_embeddings(package_name, package_version) |>
persist_embeddings(package_name, package_version)
:ok
end
end
def retrieve_docs(package_name, package_version, query) do
with true <- has_docs?(package_name, package_version) do
%{embedding: tensor} = Nx.Serving.batched_run(Hex2context.EmbeddingServing, query)
query_embedding = SqliteVec.Float32.new(tensor)
doc_chunks = Hex2context.Repo.all(
from(i in Embedding,
where: i.package_name == ^package_name,
where: i.package_version == ^package_version,
order_by: vec_distance_L2(i.embedding, vec_f32(query_embedding)),
limit: ^@chunk_retrieval_limit,
select: i.doc_chunk
)
)
{:ok, doc_chunks}
else
false ->
{:err,
"No docs for #{package_name} v#{package_version}; must pre-generate with `Hex2context.ingest_docs/2`"}
end
end
defp has_docs?(package_name, package_version) do
Hex2context.Repo.exists?(
from(i in Embedding,
where: i.package_name == ^package_name,
where: i.package_version == ^package_version
)
)
end
defp fetch_docs_and_generate_embeddings(package_name, package_version) do
url = @hex2txt <> "/" <> package_name <> "/" <> package_version <> "/llms.txt"
docs = Req.get!(@req, url: url, http_errors: :raise).body
chunks =
docs
|> TextChunker.split(format: :markdown)
|> Enum.map(fn chunk ->
%TextChunker.Chunk{chunk | text: String.trim(chunk.text)}
end)
chunk_count = Enum.count(chunks)
Logger.info(
"Fetched #{chunk_count} documentation chunks for #{package_name} v#{package_version}"
)
embeddings =
chunks
|> Enum.map(& &1.text)
|> Enum.with_index()
|> Enum.map(fn {chunk, index} ->
Logger.info("Computing embedding for chunk #{index}/#{chunk_count}...")
Nx.Serving.batched_run(Hex2context.EmbeddingServing, chunk)
end)
Logger.info("Finished computing embeddings for #{chunk_count} chunks")
{chunks, embeddings}
end
defp persist_embeddings({chunks, embeddings}, package_name, package_version) do
for {%TextChunker.Chunk{text: text}, %{embedding: tensor}} <- Enum.zip(chunks, embeddings) do
Hex2context.Repo.insert(
%Embedding{
embedding: SqliteVec.Float32.new(tensor),
package_name: package_name,
package_version: package_version,
doc_chunk: text
},
on_conflict: :replace_all
)
end
end
end
defmodule Hex2context.API do
use Plug.Router
plug :match
plug Plug.Parsers, parsers: [:urlencoded]
plug :dispatch
get "/:package_name/:package_version" do
query = conn.params["query"] || ""
case Hex2context.retrieve_docs(package_name, package_version, query) do
{:err, message} ->
send_resp(conn, 404, message)
{:ok, doc_chunks} ->
send_resp(conn, 200, doc_chunks |> Enum.join("\n\n"))
_ ->
send_resp(conn, 500, "Unable to retrieve documentation")
end
end
match _ do
send_resp(conn, 404, "Not found")
end
end
Kino.Proxy.listen(Hex2context.API)
Hex2context.ingest_docs([
{"flop", "0.26.1"},
{"flop_phoenix", "0.23.1"},
])
Hex2context.retrieve_docs("flop", "0.26.1", "filter date")
Hex2context.ingest_docs("geo", "4.0.1", force: true)
Hex2context.retrieve_docs("geo", "4.0.1", "WKB")