From b4fa02ff917665c9ae69893db087f61f46a97650 Mon Sep 17 00:00:00 2001
From: dante <45801863+alexander-camuto@users.noreply.github.com>
Date: Wed, 11 Dec 2024 23:11:23 -0500
Subject: [PATCH] chore: neural bag of words example

---
 .github/workflows/rust.yml          |   4 +-
 examples/notebooks/neural_bow.ipynb | 770 ++++++++++++++++++++++++++++
 tests/py_integration_tests.rs       |  12 +
 3 files changed, 785 insertions(+), 1 deletion(-)
 create mode 100644 examples/notebooks/neural_bow.ipynb

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 6132dd1a9..c47a518c4 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -650,7 +650,9 @@ jobs:
         run: python -m venv .env --clear; source .env/bin/activate; pip install -r requirements.txt; python -m ensurepip --upgrade
       - name: Build python ezkl
         run: source .env/bin/activate; unset CONDA_PREFIX; maturin develop --features python-bindings --release
-      - name: Postgres tutorials
+      - name: Neural bow
+        run: source .env/bin/activate; cargo nextest run py_tests::tests::neural_bag_of_words_ --no-capture
+      - name: Felt conversion
         run: source .env/bin/activate; cargo nextest run py_tests::tests::felt_conversion_test_ --no-capture
       - name: Postgres tutorials
         run: source .env/bin/activate; cargo nextest run py_tests::tests::postgres_ --no-capture
diff --git a/examples/notebooks/neural_bow.ipynb b/examples/notebooks/neural_bow.ipynb
new file mode 100644
index 000000000..ed0ffdb01
--- /dev/null
+++ b/examples/notebooks/neural_bow.ipynb
@@ -0,0 +1,770 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "This is a zk version of the tutorial found [here](https://github.com/bentrevett/pytorch-sentiment-analysis/blob/main/1%20-%20Neural%20Bag%20of%20Words.ipynb). The original tutorial is part of the PyTorch Sentiment Analysis series by Ben Trevett.\n",
+    "\n",
+    "1 - NBoW\n",
+    "\n",
+    "In this series we'll be building a machine learning model to perform sentiment analysis -- a subset of text classification where the task is to detect if a given sentence is positive or negative -- using PyTorch and torchtext. The dataset used will be movie reviews from the IMDb dataset, which we'll obtain using the datasets library.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "Preparing Data\n",
+    "\n",
+    "Before we can implement our NBoW model, we first have to perform quite a few steps to get our data ready to use. NLP usually requires quite a lot of data wrangling beforehand, though libraries such as datasets and torchtext handle most of this for us.\n",
+    "\n",
+    "The steps to take are:\n",
+    "\n",
+    "    1. importing modules\n",
+    "    2. loading data\n",
+    "    3. tokenizing data\n",
+    "    4. creating data splits\n",
+    "    5. creating a vocabulary\n",
+    "    6. numericalizing data\n",
+    "    7. creating the data loaders\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install torchtex"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import collections\n",
+    "\n",
+    "import datasets\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim\n",
+    "import torchtext\n",
+    "import tqdm\n",
+    "\n",
+    "# It is usually good practice to run your experiments multiple times with different random seeds -- both to measure the variance of your model and also to avoid having results only calculated with either \"good\" or \"bad\" seeds, i.e. being very lucky or unlucky with the randomness in the training process.\n",
+    "\n",
+    "seed = 1234\n",
+    "\n",
+    "np.random.seed(seed)\n",
+    "torch.manual_seed(seed)\n",
+    "torch.cuda.manual_seed(seed)\n",
+    "torch.backends.cudnn.deterministic = True\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data, test_data = datasets.load_dataset(\"imdb\", split=[\"train\", \"test\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can check the features attribute of a split to get more information about the features. We can see that text is a Value of dtype=string -- in other words, it's a string -- and that label is a ClassLabel. A ClassLabel means the feature is an integer representation of which class the example belongs to. num_classes=2 means that our labels are one of two values, 0 or 1, and names=['neg', 'pos'] gives us the human-readable versions of those values. Thus, a label of 0 means the example is a negative review and a label of 1 means the example is a positive review."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data.features\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "One of the first things we need to do to our data is tokenize it. Machine learning models aren't designed to handle strings, they're design to handle numbers. So what we need to do is break down our string into individual tokens, and then convert these tokens to numbers. We'll get to the conversion later, but first we'll look at tokenization.\n",
+    "\n",
+    "Tokenization involves using a tokenizer to process the strings in our dataset. A tokenizer is a function that goes from a string to a list of strings. There are many types of tokenizers available, but we're going to use a relatively simple one provided by torchtext called the basic_english tokenizer. We load our tokenizer as such:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer = torchtext.data.utils.get_tokenizer(\"basic_english\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def tokenize_example(example, tokenizer, max_length):\n",
+    "    tokens = tokenizer(example[\"text\"])[:max_length]\n",
+    "    return {\"tokens\": tokens}\n",
+    "\n",
+    "\n",
+    "max_length = 256\n",
+    "\n",
+    "train_data = train_data.map(\n",
+    "    tokenize_example, fn_kwargs={\"tokenizer\": tokenizer, \"max_length\": max_length}\n",
+    ")\n",
+    "test_data = test_data.map(\n",
+    "    tokenize_example, fn_kwargs={\"tokenizer\": tokenizer, \"max_length\": max_length}\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# create validation data \n",
+    "# Why have both a validation set and a test set? Your test set respresents the real world data that you'd see if you actually deployed this model. You won't be able to see what data your model will be fed once deployed, and your test set is supposed to reflect that. Every time we tune our model hyperparameters or training set-up to make it do a bit better on the test set, we are leak information from the test set into the training process. If we do this too often then we begin to overfit on the test set. Hence, we need some data which can act as a \"proxy\" test set which we can look at more frequently in order to evaluate how well our model actually does on unseen data -- this is the validation set.\n",
+    "\n",
+    "test_size = 0.25\n",
+    "\n",
+    "train_valid_data = train_data.train_test_split(test_size=test_size)\n",
+    "train_data = train_valid_data[\"train\"]\n",
+    "valid_data = train_valid_data[\"test\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we have to build a vocabulary. This is look-up table where every unique token in your dataset has a corresponding index (an integer).\n",
+    "\n",
+    "We do this as machine learning models cannot operate on strings, only numerical vaslues. Each index is used to construct a one-hot vector for each token. A one-hot vector is a vector where all the elements are 0, except one, which is 1, and the dimensionality is the total number of unique tokens in your vocabulary, commonly denoted by V."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "min_freq = 5\n",
+    "special_tokens = [\"<unk>\", \"<pad>\"]\n",
+    "\n",
+    "vocab = torchtext.vocab.build_vocab_from_iterator(\n",
+    "    train_data[\"tokens\"],\n",
+    "    min_freq=min_freq,\n",
+    "    specials=special_tokens,\n",
+    ")\n",
+    "\n",
+    "# We store the indices of the unknown and padding tokens (zero and one, respectively) in variables, as we'll use these further on in this notebook.\n",
+    "\n",
+    "unk_index = vocab[\"<unk>\"]\n",
+    "pad_index = vocab[\"<pad>\"]\n",
+    "\n",
+    "\n",
+    "vocab.set_default_index(unk_index)\n",
+    "\n",
+    "# To look-up a list of tokens, we can use the vocabulary's lookup_indices method.\n",
+    "vocab.lookup_indices([\"hello\", \"world\", \"some_token\", \"<pad>\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we have our vocabulary, we can numericalize our data. This involves converting the tokens within our dataset into indices. Similar to how we tokenized our data using the Dataset.map method, we'll define a function that takes an example and our vocabulary, gets the index for each token in each example and then creates an ids field which containes the numericalized tokens."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def numericalize_example(example, vocab):\n",
+    "    ids = vocab.lookup_indices(example[\"tokens\"])\n",
+    "    return {\"ids\": ids}\n",
+    "\n",
+    "train_data = train_data.map(numericalize_example, fn_kwargs={\"vocab\": vocab})\n",
+    "valid_data = valid_data.map(numericalize_example, fn_kwargs={\"vocab\": vocab})\n",
+    "test_data = test_data.map(numericalize_example, fn_kwargs={\"vocab\": vocab})\n",
+    "\n",
+    "train_data = train_data.with_format(type=\"torch\", columns=[\"ids\", \"label\"])\n",
+    "valid_data = valid_data.with_format(type=\"torch\", columns=[\"ids\", \"label\"])\n",
+    "test_data = test_data.with_format(type=\"torch\", columns=[\"ids\", \"label\"])\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The final step of preparing the data is creating the data loaders. We can iterate over a data loader to retrieve batches of examples. This is also where we will perform any padding that is necessary.\n",
+    "\n",
+    "We first need to define a function to collate a batch, consisting of a list of examples, into what we want our data loader to output.\n",
+    "\n",
+    "Here, our desired output from the data loader is a dictionary with keys of \"ids\" and \"label\".\n",
+    "\n",
+    "The value of batch[\"ids\"] should be a tensor of shape [batch size, length], where length is the length of the longest sentence (in terms of tokens) within the batch, and all sentences shorter than this should be padded to that length.\n",
+    "\n",
+    "The value of batch[\"label\"] should be a tensor of shape [batch size] consisting of the label for each sentence in the batch.\n",
+    "\n",
+    "We define a function, get_collate_fn, which is passed the pad token index and returns the actual collate function. Within the actual collate function, collate_fn, we get a list of \"ids\" tensors for each example in the batch, and then use the pad_sequence function, which converts the list of tensors into the desired [batch size, length] shaped tensor and performs padding using the specified pad_index. By default, pad_sequence will return a [length, batch size] shaped tensor, but by setting batch_first=True, these two dimensions are switched. We get a list of \"label\" tensors and convert the list of tensors into a single [batch size] shaped tensor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_collate_fn(pad_index):\n",
+    "    def collate_fn(batch):\n",
+    "        batch_ids = [i[\"ids\"] for i in batch]\n",
+    "        batch_ids = nn.utils.rnn.pad_sequence(\n",
+    "            batch_ids, padding_value=pad_index, batch_first=True\n",
+    "        )\n",
+    "        batch_label = [i[\"label\"] for i in batch]\n",
+    "        batch_label = torch.stack(batch_label)\n",
+    "        batch = {\"ids\": batch_ids, \"label\": batch_label}\n",
+    "        return batch\n",
+    "\n",
+    "    return collate_fn\n",
+    "\n",
+    "def get_data_loader(dataset, batch_size, pad_index, shuffle=False):\n",
+    "    collate_fn = get_collate_fn(pad_index)\n",
+    "    data_loader = torch.utils.data.DataLoader(\n",
+    "        dataset=dataset,\n",
+    "        batch_size=batch_size,\n",
+    "        collate_fn=collate_fn,\n",
+    "        shuffle=shuffle,\n",
+    "    )\n",
+    "    return data_loader\n",
+    "\n",
+    "\n",
+    "batch_size = 512\n",
+    "\n",
+    "train_data_loader = get_data_loader(train_data, batch_size, pad_index, shuffle=True)\n",
+    "valid_data_loader = get_data_loader(valid_data, batch_size, pad_index)\n",
+    "test_data_loader = get_data_loader(test_data, batch_size, pad_index)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "class NBoW(nn.Module):\n",
+    "    def __init__(self, vocab_size, embedding_dim, output_dim, pad_index):\n",
+    "        super().__init__()\n",
+    "        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_index)\n",
+    "        self.fc = nn.Linear(embedding_dim, output_dim)\n",
+    "\n",
+    "    def forward(self, ids):\n",
+    "        # ids = [batch size, seq len]\n",
+    "        embedded = self.embedding(ids)\n",
+    "        # embedded = [batch size, seq len, embedding dim]\n",
+    "        pooled = embedded.mean(dim=1)\n",
+    "        # pooled = [batch size, embedding dim]\n",
+    "        prediction = self.fc(pooled)\n",
+    "        # prediction = [batch size, output dim]\n",
+    "        return prediction\n",
+    "\n",
+    "\n",
+    "vocab_size = len(vocab)\n",
+    "embedding_dim = 300\n",
+    "output_dim = len(train_data.unique(\"label\"))\n",
+    "\n",
+    "model = NBoW(vocab_size, embedding_dim, output_dim, pad_index)\n",
+    "\n",
+    "def count_parameters(model):\n",
+    "    return sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
+    "\n",
+    "\n",
+    "print(f\"The model has {count_parameters(model):,} trainable parameters\")\n",
+    "\n",
+    "vectors = torchtext.vocab.GloVe()\n",
+    "\n",
+    "pretrained_embedding = vectors.get_vecs_by_tokens(vocab.get_itos())\n",
+    "\n",
+    "optimizer = optim.Adam(model.parameters())\n",
+    "\n",
+    "criterion = nn.CrossEntropyLoss()\n",
+    "\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "\n",
+    "model = model.to(device)\n",
+    "criterion = criterion.to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train(data_loader, model, criterion, optimizer, device):\n",
+    "    model.train()\n",
+    "    epoch_losses = []\n",
+    "    epoch_accs = []\n",
+    "    for batch in tqdm.tqdm(data_loader, desc=\"training...\"):\n",
+    "        ids = batch[\"ids\"].to(device)\n",
+    "        label = batch[\"label\"].to(device)\n",
+    "        prediction = model(ids)\n",
+    "        loss = criterion(prediction, label)\n",
+    "        accuracy = get_accuracy(prediction, label)\n",
+    "        optimizer.zero_grad()\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        epoch_losses.append(loss.item())\n",
+    "        epoch_accs.append(accuracy.item())\n",
+    "    return np.mean(epoch_losses), np.mean(epoch_accs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate(data_loader, model, criterion, device):\n",
+    "    model.eval()\n",
+    "    epoch_losses = []\n",
+    "    epoch_accs = []\n",
+    "    with torch.no_grad():\n",
+    "        for batch in tqdm.tqdm(data_loader, desc=\"evaluating...\"):\n",
+    "            ids = batch[\"ids\"].to(device)\n",
+    "            label = batch[\"label\"].to(device)\n",
+    "            prediction = model(ids)\n",
+    "            loss = criterion(prediction, label)\n",
+    "            accuracy = get_accuracy(prediction, label)\n",
+    "            epoch_losses.append(loss.item())\n",
+    "            epoch_accs.append(accuracy.item())\n",
+    "    return np.mean(epoch_losses), np.mean(epoch_accs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_accuracy(prediction, label):\n",
+    "    batch_size, _ = prediction.shape\n",
+    "    predicted_classes = prediction.argmax(dim=-1)\n",
+    "    correct_predictions = predicted_classes.eq(label).sum()\n",
+    "    accuracy = correct_predictions / batch_size\n",
+    "    return accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_epochs = 10\n",
+    "best_valid_loss = float(\"inf\")\n",
+    "\n",
+    "metrics = collections.defaultdict(list)\n",
+    "\n",
+    "for epoch in range(n_epochs):\n",
+    "    train_loss, train_acc = train(\n",
+    "        train_data_loader, model, criterion, optimizer, device\n",
+    "    )\n",
+    "    valid_loss, valid_acc = evaluate(valid_data_loader, model, criterion, device)\n",
+    "    metrics[\"train_losses\"].append(train_loss)\n",
+    "    metrics[\"train_accs\"].append(train_acc)\n",
+    "    metrics[\"valid_losses\"].append(valid_loss)\n",
+    "    metrics[\"valid_accs\"].append(valid_acc)\n",
+    "    if valid_loss < best_valid_loss:\n",
+    "        best_valid_loss = valid_loss\n",
+    "        torch.save(model.state_dict(), \"nbow.pt\")\n",
+    "    print(f\"epoch: {epoch}\")\n",
+    "    print(f\"train_loss: {train_loss:.3f}, train_acc: {train_acc:.3f}\")\n",
+    "    print(f\"valid_loss: {valid_loss:.3f}, valid_acc: {valid_acc:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plt.figure(figsize=(10, 6))\n",
+    "ax = fig.add_subplot(1, 1, 1)\n",
+    "ax.plot(metrics[\"train_losses\"], label=\"train loss\")\n",
+    "ax.plot(metrics[\"valid_losses\"], label=\"valid loss\")\n",
+    "ax.set_xlabel(\"epoch\")\n",
+    "ax.set_ylabel(\"loss\")\n",
+    "ax.set_xticks(range(n_epochs))\n",
+    "ax.legend()\n",
+    "ax.grid()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plt.figure(figsize=(10, 6))\n",
+    "ax = fig.add_subplot(1, 1, 1)\n",
+    "ax.plot(metrics[\"train_accs\"], label=\"train accuracy\")\n",
+    "ax.plot(metrics[\"valid_accs\"], label=\"valid accuracy\")\n",
+    "ax.set_xlabel(\"epoch\")\n",
+    "ax.set_ylabel(\"loss\")\n",
+    "ax.set_xticks(range(n_epochs))\n",
+    "ax.legend()\n",
+    "ax.grid()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.load_state_dict(torch.load(\"nbow.pt\"))\n",
+    "\n",
+    "test_loss, test_acc = evaluate(test_data_loader, model, criterion, device)\n",
+    "\n",
+    "print(f\"test_loss: {test_loss:.3f}, test_acc: {test_acc:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict_sentiment(text, model, tokenizer, vocab, device):\n",
+    "    tokens = tokenizer(text)\n",
+    "    ids = vocab.lookup_indices(tokens)\n",
+    "    tensor = torch.LongTensor(ids).unsqueeze(dim=0).to(device)\n",
+    "    prediction = model(tensor).squeeze(dim=0)\n",
+    "    probability = torch.softmax(prediction, dim=-1)\n",
+    "    predicted_class = prediction.argmax(dim=-1).item()\n",
+    "    predicted_probability = probability[predicted_class].item()\n",
+    "    return predicted_class, predicted_probability"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This film is terrible!\"\n",
+    "\n",
+    "predict_sentiment(text, model, tokenizer, vocab, device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This film is great!\"\n",
+    "\n",
+    "predict_sentiment(text, model, tokenizer, vocab, device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This film is not terrible, it's great!\"\n",
+    "\n",
+    "predict_sentiment(text, model, tokenizer, vocab, device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This film is not great, it's terrible!\"\n",
+    "\n",
+    "predict_sentiment(text, model, tokenizer, vocab, device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def text_to_tensor(text, tokenizer, vocab, device):\n",
+    "    tokens = tokenizer(text)\n",
+    "    ids = vocab.lookup_indices(tokens)\n",
+    "    tensor = torch.LongTensor(ids).unsqueeze(dim=0).to(device)\n",
+    "    return tensor\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we do onnx stuff to get the data ready for the zk-circuit."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "import json\n",
+    "\n",
+    "text = \"This film is terrible!\"\n",
+    "x = text_to_tensor(text, tokenizer, vocab, device)\n",
+    "\n",
+    "# Flips the neural net into inference mode\n",
+    "model.eval()\n",
+    "model.to('cpu')\n",
+    "\n",
+    "model_path = \"network.onnx\"\n",
+    "data_path = \"input.json\"\n",
+    "\n",
+    "    # Export the model\n",
+    "torch.onnx.export(model,               # model being run\n",
+    "                      x,                   # model input (or a tuple for multiple inputs)\n",
+    "                      model_path,            # where to save the model (can be a file or file-like object)\n",
+    "                      export_params=True,        # store the trained parameter weights inside the model file\n",
+    "                      opset_version=10,          # the ONNX version to export the model to\n",
+    "                      do_constant_folding=True,  # whether to execute constant folding for optimization\n",
+    "                      input_names = ['input'],   # the model's input names\n",
+    "                      output_names = ['output'], # the model's output names\n",
+    "                      dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes\n",
+    "                                    'output' : {0 : 'batch_size'}})\n",
+    "\n",
+    "\n",
+    "\n",
+    "data_array = ((x).detach().numpy()).reshape([-1]).tolist()\n",
+    "\n",
+    "data_json = dict(input_data = [data_array])\n",
+    "\n",
+    "print(data_json)\n",
+    "\n",
+    "    # Serialize data into file:\n",
+    "json.dump(data_json, open(data_path, 'w'))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import ezkl\n",
+    "\n",
+    "run_args = ezkl.PyRunArgs()\n",
+    "run_args.logrows = 23\n",
+    "run_args.scale_rebase_multiplier = 10\n",
+    "# inputs should be auditable by all\n",
+    "run_args.input_visibility = \"public\"\n",
+    "# same with outputs\n",
+    "run_args.output_visibility = \"public\"\n",
+    "# for simplicity, we'll just use the fixed model visibility: i.e it is public and can't be changed by the prover\n",
+    "run_args.param_visibility = \"fixed\"\n",
+    "\n",
+    "\n",
+    "# TODO: Dictionary outputs\n",
+    "res = ezkl.gen_settings(py_run_args=run_args)\n",
+    "assert res == True\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "res = ezkl.compile_circuit()\n",
+    "assert res == True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# srs path\n",
+    "res = await ezkl.get_srs()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# now generate the witness file\n",
+    "res = await ezkl.gen_witness()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "res = ezkl.mock()\n",
+    "assert res == True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# HERE WE SETUP THE CIRCUIT PARAMS\n",
+    "# WE GOT KEYS\n",
+    "# WE GOT CIRCUIT PARAMETERS\n",
+    "# EVERYTHING ANYONE HAS EVER NEEDED FOR ZK\n",
+    "\n",
+    "res = ezkl.setup()\n",
+    "\n",
+    "assert res == True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# GENERATE A PROOF\n",
+    "res = ezkl.prove()\n",
+    "\n",
+    "print(res)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# VERIFY IT\n",
+    "res = ezkl.verify()\n",
+    "\n",
+    "assert res == True\n",
+    "print(\"verified\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also verify it on chain by creating an onchain verifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# check if notebook is in colab\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    import subprocess\n",
+    "    import sys\n",
+    "    subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"solc-select\"])\n",
+    "    !solc-select install 0.8.20\n",
+    "    !solc-select use 0.8.20\n",
+    "    !solc --version\n",
+    "\n",
+    "# rely on local installation if the notebook is not in colab\n",
+    "except:\n",
+    "    import os\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sol_code_path = os.path.join('Verifier.sol')\n",
+    "abi_path = os.path.join('Verifier.abi')\n",
+    "\n",
+    "res = await ezkl.create_evm_verifier()\n",
+    "\n",
+    "assert res == True\n",
+    "assert os.path.isfile(sol_code_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You should see a `Verifier.sol`. Right-click and save it locally.\n",
+    "\n",
+    "Now go to [https://remix.ethereum.org](https://remix.ethereum.org).\n",
+    "\n",
+    "Create a new file within remix and copy the verifier code over.\n",
+    "\n",
+    "Finally, compile the code and deploy. For the demo you can deploy to the test environment within remix.\n",
+    "\n",
+    "If everything works, you would have deployed your verifer onchain! Copy the values in the cell above to the respective fields to test if the verifier is working."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tests/py_integration_tests.rs b/tests/py_integration_tests.rs
index abb6c9129..30f5e41ba 100644
--- a/tests/py_integration_tests.rs
+++ b/tests/py_integration_tests.rs
@@ -68,6 +68,8 @@ mod py_tests {
                     "install",
                     "torch-geometric==2.5.2",
                     "torch==2.2.2",
+                    "datasets==3.2.0",
+                    "torchtext==0.17.2",
                     "torchvision==0.17.2",
                     "pandas==2.2.1",
                     "numpy==1.26.4",
@@ -190,6 +192,16 @@ mod py_tests {
             }
             });
 
+            #[test]
+            fn neural_bag_of_words_notebook() {
+                crate::py_tests::init_binary();
+                let test_dir: TempDir = TempDir::new("neural_bow").unwrap();
+                let path = test_dir.path().to_str().unwrap();
+                crate::py_tests::mv_test_(path, "neural_bow.ipynb");
+                run_notebook(path, "neural_bow.ipynb");
+                test_dir.close().unwrap();
+            }
+
             #[test]
             fn felt_conversion_test_notebook() {
                 crate::py_tests::init_binary();