Skip to content

Latest commit

 

History

History
358 lines (280 loc) · 9.55 KB

hex2context.livemd

File metadata and controls

358 lines (280 loc) · 9.55 KB

hex2context

Mix.install([
  {:sqlite_vec, github: "joelpaulkoch/sqlite_vec"},
  {:ecto, "~> 3.12"},
  {:ecto_sql, "~> 3.12"},
  {:ecto_sqlite3, "~> 0.17.2"},
  {:kino, "~> 0.14.1"},
  {:nx, "~> 0.9.1"},
  {:bumblebee, "~> 0.6.0"},
  {:exla, "~> 0.9.0"},
  {:axon, "~> 0.7.0"},
  {:text_chunker, "~> 0.3.1"},
  {:req, "~> 0.5.8"},
  {:plug, "~> 1.16"}
])

Nx.global_default_backend(EXLA.Backend)

Documentation

hex2context is a Livebook companion to hex2txt.

Introduction

hex2context uses Retrieval-Augmented Generation (RAG) to include only the most relevant snippets of documentation from the llms.txt files generated by hex2txt.

(Excluding documentation that is not as relevant to the task-at-hand make more efficient use of limited LLM context window sizes, and reduces inference costs.)

Usage

Step 1: Generate Embeddings

First, generate embeddings for every combination of package/version that you will be interested in querying later. For example:

Hex2context.ingest_docs([
  {"phoenix", "1.7.18"},
  {"phoenix_html", "4.1.1"},
  {"phoenix_live_view", "1.0.1"},
  {"flop", "0.26.1"},
  {"flop_phoenix", "0.23.1"},
  {"ecto", "3.12.5"}
])

Embeddings are generated locally (i.e., on the computer running this Livebook) and durably written to a Sqlite database in the same folder on disk that this notebook is running from. For larger packages (like many in the example above), generating these embeddings may take some time. However, once persisted, they do not need to be re-computed.

Step 2: Query for Relevant Documentation Snippets

This notebook uses Kino.Proxy to expose the following HTTP API:

  • GET /proxy/sessions/:id/:package_name/:package_version?query=:query

Here:

  • :id is the identifier of the current Livebook session (check your browser's address bar for the identifier of your current session; more info)
  • :package_name is the name of the package to query documentation for
  • :package_version is the version number of the package to query documentation for
  • :query is the (URL-encoded) query used for similarity search

Documentation relevant to the provided :query (for the indicated package) will be returned from this endpoint, and can be fed in directly to your AI coding assistnat of choice.

Note that this endpoint will return an error if embeddings for the indicated package (name and version) have not been ingested yet.

For example:

curl "http://localhost:52039/proxy/sessions/6lq7fwziy23shg77c7vjzjtqnoy4hlpfkzhbsqsiuel7vaqr/phoenix_live_view/1.0.0/?query=phx-click"

Thanks

With thanks to the following resources:

Infrastructure (DB)

defmodule Hex2context.Repo do
  use Ecto.Repo,
    otp_app: :hex2context,
    adapter: Ecto.Adapters.SQLite3
end
Kino.start_child(
  {Hex2context.Repo,
   database: Path.join(__DIR__, "hex2context.db"), load_extensions: [SqliteVec.path()]}
)
defmodule Hex2context.Repo.Migrations.CreateEmbeddingsTable do
  use Ecto.Migration

  def up do
    execute(~s"
      CREATE TABLE embeddings(
        id INTEGER PRIMARY KEY,
        package_name TEXT NOT NULL,
        package_version TEXT NOT NULL,
        doc_chunk TEXT NOT NULL,
        embedding FLOAT[384] NOT NULL,
        UNIQUE(package_name, package_version, doc_chunk)
      );
      
      CREATE INDEX idx_package ON embeddings(package_name, package_version);
    ")
  end

  def down do
    execute("DROP TABLE embeddings")
  end
end
defmodule Hex2context.Repo.Migrations do
  alias Hex2context.Repo.Migrations.CreateEmbeddingsTable
  def migrate, do: Ecto.Migrator.up(Hex2context.Repo, 1, CreateEmbeddingsTable)
  def rollback, do: Ecto.Migrator.down(Hex2context.Repo, 1, CreateEmbeddingsTable)
end
Hex2context.Repo.Migrations.migrate()

Infrastructure (ML)

defmodule Hex2context.Serving do
  def build_serving_for_embeddings() do
    repo = {:hf, "sentence-transformers/all-MiniLM-L6-v2"}

    {:ok, model_info} = Bumblebee.load_model(repo) 
    {:ok, tokenizer} = Bumblebee.load_tokenizer(repo)

    Bumblebee.Text.text_embedding(
        model_info, 
        tokenizer,
        output_pool: :mean_pooling,
        output_attribute: :hidden_state,
        embedding_processor: :l2_norm,
        compile: [batch_size: 1, sequence_length: [2000]],
        defn_options: [compiler: EXLA]
      )
  end
end
Kino.start_child(
  {Nx.Serving,
   serving: Hex2context.Serving.build_serving_for_embeddings(),
   name: Hex2context.EmbeddingServing,
   batch_timeout: 100}
)

Schema

defmodule Hex2context.Embedding do
  use Ecto.Schema
  
  schema "embeddings" do
    field(:embedding, SqliteVec.Ecto.Float32)
    field(:package_name, :string)
    field(:package_version, :string)
    field(:doc_chunk, :string)
  end
end

Application (RAG)

defmodule Hex2context do
  require Logger
  import Ecto.Query
  import SqliteVec.Ecto.Query
  alias Hex2context.Embedding

  @hex2txt "https://hex2txt.fly.dev"
  @req Req.new()
  @chunk_retrieval_limit 20

  def ingest_docs(packages_list) when is_list(packages_list) do
    for {package_name, package_version} <- packages_list do
      ingest_docs(package_name, package_version)
    end

    :ok
  end
  
  def ingest_docs(package_name, package_version, opts \\ []) do
    force_refresh? = Keyword.get(opts, :force, false)
    already_loaded? = has_docs?(package_name, package_version)

    case {already_loaded?, force_refresh?} do
      {true, false} ->
        Logger.info("Docs already ingested for #{package_name} v#{package_version}; specify `force: true` to refresh")
        :ok

      _ ->
        fetch_docs_and_generate_embeddings(package_name, package_version) |>
          persist_embeddings(package_name, package_version)
        
        :ok
    end
  end

  def retrieve_docs(package_name, package_version, query) do
    with true <- has_docs?(package_name, package_version) do
      %{embedding: tensor} = Nx.Serving.batched_run(Hex2context.EmbeddingServing, query)

      query_embedding = SqliteVec.Float32.new(tensor)

      doc_chunks = Hex2context.Repo.all(
        from(i in Embedding,
          where: i.package_name == ^package_name,
          where: i.package_version == ^package_version,
          order_by: vec_distance_L2(i.embedding, vec_f32(query_embedding)),
          limit: ^@chunk_retrieval_limit,
          select: i.doc_chunk
        )
      )

      {:ok, doc_chunks}
    else
      false -> 
        {:err,
         "No docs for #{package_name} v#{package_version}; must pre-generate with `Hex2context.ingest_docs/2`"}
    end
  end

  defp has_docs?(package_name, package_version) do
    Hex2context.Repo.exists?(
      from(i in Embedding,
        where: i.package_name == ^package_name,
        where: i.package_version == ^package_version
      )
    )
  end

  defp fetch_docs_and_generate_embeddings(package_name, package_version) do
    url = @hex2txt <> "/" <> package_name <> "/" <> package_version <> "/llms.txt"
    docs = Req.get!(@req, url: url, http_errors: :raise).body

    chunks =
      docs
      |> TextChunker.split(format: :markdown)
      |> Enum.map(fn chunk ->
        %TextChunker.Chunk{chunk | text: String.trim(chunk.text)}
      end)

    chunk_count = Enum.count(chunks)

    Logger.info(
      "Fetched #{chunk_count} documentation chunks for #{package_name} v#{package_version}"
    )

    embeddings =
      chunks
      |> Enum.map(& &1.text)
      |> Enum.with_index()
      |> Enum.map(fn {chunk, index} ->
        Logger.info("Computing embedding for chunk #{index}/#{chunk_count}...")
        Nx.Serving.batched_run(Hex2context.EmbeddingServing, chunk)
      end)

    Logger.info("Finished computing embeddings for #{chunk_count} chunks")

    {chunks, embeddings}
  end
  
  defp persist_embeddings({chunks, embeddings}, package_name, package_version) do
    for {%TextChunker.Chunk{text: text}, %{embedding: tensor}} <- Enum.zip(chunks, embeddings) do
      Hex2context.Repo.insert(
        %Embedding{
          embedding: SqliteVec.Float32.new(tensor),
          package_name: package_name,
          package_version: package_version,
          doc_chunk: text
        },
        on_conflict: :replace_all
      )
    end
  end
end

HTTP API Server

defmodule Hex2context.API do
  use Plug.Router

  plug :match
  plug Plug.Parsers, parsers: [:urlencoded]
  plug :dispatch

  get "/:package_name/:package_version" do
    query = conn.params["query"] || ""

    case Hex2context.retrieve_docs(package_name, package_version, query) do
      {:err, message} ->
        send_resp(conn, 404, message)

      {:ok, doc_chunks} ->
        send_resp(conn, 200, doc_chunks |> Enum.join("\n\n"))

      _ ->
        send_resp(conn, 500, "Unable to retrieve documentation")
    end
  end

  match _ do
    send_resp(conn, 404, "Not found")
  end
end
Kino.Proxy.listen(Hex2context.API)

Playground

Hex2context.ingest_docs([
  {"flop", "0.26.1"},
  {"flop_phoenix", "0.23.1"},
])
Hex2context.retrieve_docs("flop", "0.26.1", "filter date")
Hex2context.ingest_docs("geo", "4.0.1", force: true)
Hex2context.retrieve_docs("geo", "4.0.1", "WKB")