diff --git a/colabs/wandb-model-registry/New_Model_Logging_in_W&B.ipynb b/colabs/wandb-model-registry/New_Model_Logging_in_W&B.ipynb new file mode 100644 index 00000000..449e9d7e --- /dev/null +++ b/colabs/wandb-model-registry/New_Model_Logging_in_W&B.ipynb @@ -0,0 +1,416 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "authorship_tag": "ABX9TyOlvkZjseUluPYEdqGH1TjK", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Logging and Registering Models in W&B\n", + "It's never been easier to log your model checkpoints, keep track of the best ones, and maintain lineage of runs and results!\n", + "\n", + "W&B is introducing a few convenience methods to make logging models and linking them to the registry simple:\n", + "- `log_model`\n", + "- `use_model`\n", + "- `link_model`" + ], + "metadata": { + "id": "saeQIrcF155i" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Imports" + ], + "metadata": { + "id": "_ewwM5T_OWTr" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BbkuyVKRwCbi" + }, + "outputs": [], + "source": [ + "!pip install -qqq wandb einops" + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "from torch import nn\n", + "from einops import rearrange, repeat\n", + "from einops.layers.torch import Rearrange\n", + "import torch\n", + "from torch.utils.data import DataLoader, Dataset\n", + "from torchvision import transforms\n", + "import torch\n", + "from torch import nn\n", + "from torch.optim import Adam\n", + "from torch.utils.data import DataLoader\n", + "from torchvision import datasets, transforms\n", + "\n", + "import wandb" + ], + "metadata": { + "id": "6OsMa64TOacf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Log in to W&B\n", + "- You can explicitly login using `wandb login` or `wandb.login()` (See below)\n", + "- Alternatively you can set environment variables. There are several env variables which you can set to change the behavior of W&B logging. The most important are:\n", + " - `WANDB_API_KEY` - find this in your \"Settings\" section under your profile\n", + " - `WANDB_BASE_URL` - this is the url of the W&B server\n", + "- Find your API Token in \"Profile\" -> \"Setttings\" in the W&B App\n", + "\n", + "![api_token](https://drive.google.com/uc?export=view&id=1Xn7hnn0rfPu_EW0A_-32oCXqDmpA0-kx)" + ], + "metadata": { + "id": "m3Q_QG14Ovvz" + } + }, + { + "cell_type": "code", + "source": [ + "wandb.login()" + ], + "metadata": { + "id": "5-r3O3gFwfcr" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Define the Model and Dataset\n", + "This is a simple implementation of a Vision Transformer (ViT) and utilizes a random dataset for training.\n", + "- Credit to https://github.com/lucidrains/vit-pytorch" + ], + "metadata": { + "id": "ZuL3yZ17qIGE" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Define some config for the model and dataset" + ], + "metadata": { + "id": "WE4j_CI7Tt9O" + } + }, + { + "cell_type": "code", + "source": [ + "# Define the number of samples, classes, and image size\n", + "num_samples = 100\n", + "num_classes = 10\n", + "image_size = 256\n", + "batch_size = 32" + ], + "metadata": { + "id": "D7sVekyVTlxN" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# helpers\n", + "def pair(t):\n", + " return t if isinstance(t, tuple) else (t, t)\n", + "\n", + "# classes\n", + "class FeedForward(nn.Module):\n", + " def __init__(self, dim, hidden_dim, dropout = 0.):\n", + " super().__init__()\n", + " self.net = nn.Sequential(\n", + " nn.LayerNorm(dim),\n", + " nn.Linear(dim, hidden_dim),\n", + " nn.GELU(),\n", + " nn.Dropout(dropout),\n", + " nn.Linear(hidden_dim, dim),\n", + " nn.Dropout(dropout)\n", + " )\n", + "\n", + " def forward(self, x):\n", + " return self.net(x)\n", + "\n", + "class Attention(nn.Module):\n", + " def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):\n", + " super().__init__()\n", + " inner_dim = dim_head * heads\n", + " project_out = not (heads == 1 and dim_head == dim)\n", + "\n", + " self.heads = heads\n", + " self.scale = dim_head ** -0.5\n", + "\n", + " self.norm = nn.LayerNorm(dim)\n", + "\n", + " self.attend = nn.Softmax(dim = -1)\n", + " self.dropout = nn.Dropout(dropout)\n", + "\n", + " self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)\n", + "\n", + " self.to_out = nn.Sequential(\n", + " nn.Linear(inner_dim, dim),\n", + " nn.Dropout(dropout)\n", + " ) if project_out else nn.Identity()\n", + "\n", + " def forward(self, x):\n", + " x = self.norm(x)\n", + "\n", + " qkv = self.to_qkv(x).chunk(3, dim = -1)\n", + " q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = self.heads), qkv)\n", + "\n", + " dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale\n", + "\n", + " attn = self.attend(dots)\n", + " attn = self.dropout(attn)\n", + "\n", + " out = torch.matmul(attn, v)\n", + " out = rearrange(out, 'b h n d -> b n (h d)')\n", + " return self.to_out(out)\n", + "\n", + "class Transformer(nn.Module):\n", + " def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.):\n", + " super().__init__()\n", + " self.norm = nn.LayerNorm(dim)\n", + " self.layers = nn.ModuleList([])\n", + " for _ in range(depth):\n", + " self.layers.append(nn.ModuleList([\n", + " Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout),\n", + " FeedForward(dim, mlp_dim, dropout = dropout)\n", + " ]))\n", + "\n", + " def forward(self, x):\n", + " for attn, ff in self.layers:\n", + " x = attn(x) + x\n", + " x = ff(x) + x\n", + "\n", + " return self.norm(x)\n", + "\n", + "class ViT(nn.Module):\n", + " def __init__(self, *, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim, pool = 'cls', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0.):\n", + " super().__init__()\n", + " image_height, image_width = pair(image_size)\n", + " patch_height, patch_width = pair(patch_size)\n", + "\n", + " assert image_height % patch_height == 0 and image_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'\n", + "\n", + " num_patches = (image_height // patch_height) * (image_width // patch_width)\n", + " patch_dim = channels * patch_height * patch_width\n", + " assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'\n", + "\n", + " self.to_patch_embedding = nn.Sequential(\n", + " Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = patch_height, p2 = patch_width),\n", + " nn.LayerNorm(patch_dim),\n", + " nn.Linear(patch_dim, dim),\n", + " nn.LayerNorm(dim),\n", + " )\n", + "\n", + " self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))\n", + " self.cls_token = nn.Parameter(torch.randn(1, 1, dim))\n", + " self.dropout = nn.Dropout(emb_dropout)\n", + "\n", + " self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)\n", + "\n", + " self.pool = pool\n", + " self.to_latent = nn.Identity()\n", + "\n", + " self.mlp_head = nn.Linear(dim, num_classes)\n", + "\n", + " def forward(self, img):\n", + " x = self.to_patch_embedding(img)\n", + " b, n, _ = x.shape\n", + "\n", + " cls_tokens = repeat(self.cls_token, '1 1 d -> b 1 d', b = b)\n", + " x = torch.cat((cls_tokens, x), dim=1)\n", + " x += self.pos_embedding[:, :(n + 1)]\n", + " x = self.dropout(x)\n", + "\n", + " x = self.transformer(x)\n", + "\n", + " x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]\n", + "\n", + " x = self.to_latent(x)\n", + " return self.mlp_head(x)\n", + "\n", + "\n", + "# Define a custom dataset\n", + "class RandomImageDataset(Dataset):\n", + " def __init__(self, num_samples, num_classes, image_size):\n", + " self.num_samples = num_samples\n", + " self.num_classes = num_classes\n", + " self.image_size = image_size\n", + "\n", + " def __len__(self):\n", + " return self.num_samples\n", + "\n", + " def __getitem__(self, idx):\n", + " # Generate a random image tensor\n", + " image = torch.randn(3, self.image_size, self.image_size) # 3 channels, image_size x image_size\n", + " # Generate a random label\n", + " label = torch.randint(0, self.num_classes, (1,)).item()\n", + " return image, label\n", + "\n", + "\n", + "\n", + "# Create the dataset\n", + "dataset = RandomImageDataset(num_samples=num_samples, num_classes=num_classes, image_size=image_size)\n", + "\n", + "# Create a DataLoader\n", + "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)" + ], + "metadata": { + "id": "TntUlCT2ppV2" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Log Model Checkpoints to W&B with 1 Line!\n", + "\n", + "Use the `log_model` method to log a model artifact containing the contents inside the ‘path’ to an a run. It also marks it as an output to the run. You can see the full lineage graph of the model artifact by accessing the [lineage](https://docs.wandb.ai/guides/artifacts/explore-and-traverse-an-artifact-graph#docusaurus_skipToContent_fallback) tab inside the Artifacts view.\n", + "\n", + "`log_model()` takes as input:\n", + "\n", + "- `path`: A path to the model file(s), which can be a local file (of the form `/local/directory/file.txt`), directory (of the form `/local/directory`), or reference path to S3 (`s3://bucket/path`).\n", + "- `name`: An optional name for the model artifact the files will be logged to. Note that if no name is specified, This will default to the basename of the input path prepended with the run ID.\n", + "- `aliases`: An optional list of aliases, which can be thought of as semantic ‘nicknames’ or identifiers for a model version. For example, if this model yielded the best accuracy, you might add the alias ‘highest-accuracy’ or ‘best’." + ], + "metadata": { + "id": "ZkomIMW6VNky" + } + }, + { + "cell_type": "code", + "source": [ + "run = wandb.init(project=\"new_model_logging\",\n", + " job_type=\"training\")\n", + "\n", + "v = ViT(\n", + " image_size = image_size,\n", + " patch_size = 32,\n", + " num_classes = num_classes,\n", + " dim = 128,\n", + " depth = 3,\n", + " heads = 2,\n", + " mlp_dim = 256,\n", + " dropout = 0.1,\n", + " emb_dropout = 0.1\n", + ")\n", + "\n", + "# Define the loss function and optimizer\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = Adam(v.parameters(), lr=0.003)\n", + "\n", + "# Training loop\n", + "best_accuracy = 0\n", + "for epoch in range(5): # number of epochs\n", + " for images, labels in dataloader:\n", + " # Forward pass\n", + " preds = v(images)\n", + " loss = criterion(preds, labels)\n", + "\n", + " # Backward pass and optimization\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " wandb.log({\"train/loss\": loss})\n", + "\n", + " # Model evaluation after each epoch (using a validation set)\n", + " # Here you would write your validation loop and calculate accuracy\n", + " val_accuracy = 0.5 # Assume this is the validation accuracy you compute\n", + " model_path = 'model_vit.pth'\n", + " torch.save(v.state_dict(), model_path)\n", + "\n", + " # Check if this is the best model so far\n", + " if val_accuracy > best_accuracy:\n", + " best_accuracy = val_accuracy\n", + " # Log the model to your W&B run\n", + " wandb.log_model(name=f\"model_vit-{wandb.run.id}\", path=model_path, aliases=[\"best\", f\"epoch_{epoch}\"])\n", + " else:\n", + " wandb.log_model(name=f\"model_vit-{wandb.run.id}\", path=model_path, aliases=[f\"epoch_{epoch}\"])" + ], + "metadata": { + "id": "48khrxaKt-rm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Link Your Best Models to the Model Registry\n", + "You can bookmark your best model checkpoints and centralize them across your team. The Model Registry allows you can organize your best models by task, manage model lifecycle, facilitate easy tracking and auditing throughout the ML lifecyle, and automate downstream actions with webhooks or jobs. You can this via api through `link_model()`, which takes as input:\n", + "\n", + "- `path`: A path to the model file(s), which can be a local file (of the form `/local/directory/file.txt`), directory (of the form `/local/directory`), or reference path to S3 (`s3://bucket/path`).\n", + "- `registered_model_name`: the name of the Registered Model - a collection of linked model versions in the Model Registry, typically representing a team’s ML task - that the model should be linked to. If no Registered Model with the given name exists, a new one will be created with this name.\n", + "- `name`: An **optional** name for the model artifact the files will be logged to. Note that if no name is specified, This will default to the basename of the input path prepended with the run ID.\n", + "- `aliases`: An **optional** list of aliases, which can be thought of as semantic ‘nicknames’ or identifiers for a linked model version. For example, since this model is being linked, or published, to the Model Registry, you might add an alias “staging” or “QA”." + ], + "metadata": { + "id": "km2nU7IUVg63" + } + }, + { + "cell_type": "code", + "source": [ + "# Link the best model to the W&B Model Registry (after all epochs are finished)\n", + "artifact_name = f\"model_vit-{wandb.run.id}:best\"\n", + "best_model_path = wandb.use_model(artifact_name)\n", + "\n", + "# Link the best model to the registry\n", + "wandb.link_model(path=best_model_path,\n", + " registered_model_name=\"Industrial ViT\",\n", + " aliases=[\"staging\"])\n", + "run.finish()" + ], + "metadata": { + "id": "ckH-yexDVgJI" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file