diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 819a832..5a68682 100755 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -51,6 +51,7 @@ jobs: run: make test env: POVERTYTRACKER_RAW_URL: ${{ secrets.POVERTYTRACKER_RAW_URL }} + POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN: ${{ secrets.POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN}} - uses: codecov/codecov-action@v3 - name: Build package run: make diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index 18ea3fb..da0d25f 100755 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -66,6 +66,7 @@ jobs: run: make test env: POVERTYTRACKER_RAW_URL: ${{ secrets.POVERTYTRACKER_RAW_URL }} + POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN: ${{ secrets.POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN}} - uses: codecov/codecov-action@v3 - name: Test documentation builds if: matrix.os == 'ubuntu-latest' diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a46321..486c255 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.3.0] - 2024-07-16 07:44:29 + +### Added + +- PolicyEngine UK testing + ## [0.2.0] - 2024-07-02 13:04:56 ### Added @@ -17,7 +23,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Basic repo - - +[1.0.0]: https://github.com/PolicyEngine/reweight/compare/0.2.0...1.0.0 [0.2.0]: https://github.com/PolicyEngine/reweight/compare/0.1.0...0.2.0 - diff --git a/changelog.yaml b/changelog.yaml index 0461e16..5cfcb68 100644 --- a/changelog.yaml +++ b/changelog.yaml @@ -1,10 +1,15 @@ - changes: added: - - Basic repo + - Basic repo date: 2024-06-26 00:00:00 version: 0.1.0 - bump: minor changes: added: - - PyTorch testing with Microsimulation datasets + - PyTorch testing with Microsimulation datasets date: 2024-07-02 13:04:56 +- bump: minor + changes: + added: + - PolicyEngine UK testing + date: 2024-07-16 07:44:29 diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..506ffb5 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + fixed: + - PolicyEngine UK secrets issues diff --git a/reweight/tests/test_installation.py b/reweight/tests/test_installation.py index 29f06ec..b0ec532 100644 --- a/reweight/tests/test_installation.py +++ b/reweight/tests/test_installation.py @@ -13,3 +13,11 @@ def test_install(): import reweight except: raise AssertionError("Failed to build reweight") + + +def test_secret_usage(): + import os + + token = os.environ["POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN"] + token_not_none = token != None + assert token_not_none, "Authentication token is None" diff --git a/reweight/tests/test_uk_prototype.py b/reweight/tests/test_uk_prototype.py new file mode 100644 index 0000000..35ffbce --- /dev/null +++ b/reweight/tests/test_uk_prototype.py @@ -0,0 +1,5 @@ +def test_uk_microsimulation(): + from policyengine_uk import Microsimulation + + # Create a Microsimulation instance + sim = Microsimulation() diff --git a/setup.py b/setup.py index 0d37ef7..882a501 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ "pytest", "policyengine-core~=2.21.8", "policyengine-us~=0.794.1", + "policyengine-uk", ], extras_require={ "dev": [ diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 0000000..58f43c3 --- /dev/null +++ b/test.ipynb @@ -0,0 +1,191 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_uk import Microsimulation" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "token = os.environ[\"POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "sim = Microsimulation()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_uk.data import RawFRS_2021_22\n", + "RawFRS_2021_22().download()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_uk.data.datasets.frs.calibration.calibrate import generate_model_variables\n", + "\n", + "(\n", + " household_weights,\n", + " weight_adjustment,\n", + " values_df,\n", + " targets,\n", + " targets_array,\n", + " equivalisation_factors_array\n", + ") = generate_model_variables(\"frs_2021\", 2025)\n", + "\n", + "#This returns a set of household weights, a random tensor of the same size as the weights tensor,\n", + "#a Pandas dataframe to transform weights into statistical predictions, a dictionary of target values,\n", + "#an array of target values, and some equivalisation factors I don't understand." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import torch\n", + "from torch.utils.tensorboard import SummaryWriter\n", + "\n", + "# Then we're working with: this new array * the weights = our estimate.\n", + "# Then our error in a prediction is based on |predicted - actual|/equivalisation factor. Square that to get\n", + "# square error, and then average to get MSE." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "def calibrate(household_weights, weight_adjustment, values_df, targets, targets_array, equivalisation_factors_array):\n", + " # Initialize a TensorBoard writer\n", + " writer = SummaryWriter()\n", + "\n", + " #TODO: Write stuff here\n", + "\n", + " #Create a Torch tensor of log weights\n", + " log_weights = torch.log(household_weights)\n", + " log_weights.requires_grad_()\n", + "\n", + " sim_matrix = values_df.to_numpy()\n", + "\n", + " # sim_matrix (cross) exp(log_weights) = targets_array\n", + " sim_matrix = torch.tensor(sim_matrix, dtype=torch.float32)\n", + " #targets_array will be our target values.\n", + "\n", + " optimizer = torch.optim.Adam([log_weights])\n", + "\n", + " # Training loop\n", + " num_epochs = 1000\n", + " for epoch in range(num_epochs):\n", + "\n", + " # Estimate the targets\n", + " targets_estimate = torch.exp(log_weights) @ sim_matrix\n", + " # Calculate the loss\n", + " loss = torch.mean(((targets_estimate - targets_array)/equivalisation_factors_array) ** 2)\n", + "\n", + " writer.add_scalar(\"Loss/train\", loss, epoch)\n", + "\n", + " optimizer.zero_grad()\n", + "\n", + " # Perform backpropagation\n", + " loss.backward()\n", + "\n", + " # Update weights\n", + " optimizer.step()\n", + "\n", + " # Print loss for every 1000 epochs\n", + " if epoch % 100 == 0:\n", + " print(f\"Epoch {epoch}, Loss: {loss.item()}\")\n", + "\n", + " writer.flush()\n", + "\n", + " final_weights = np.exp(log_weights.detach().numpy())\n", + " final_estimates = (\n", + " final_weights @ sim_matrix.numpy()\n", + " )\n", + " true_values = targets\n", + " #print(\"Final weights:\", final_weights)\n", + " #print(\"Final estimates:\", final_estimates)\n", + " #print(\"True values:\", true_values)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0, Loss: 0.2274675965309143\n", + "Epoch 100, Loss: 0.18678854405879974\n", + "Epoch 200, Loss: 0.15837892889976501\n", + "Epoch 300, Loss: 0.13632304966449738\n", + "Epoch 400, Loss: 0.11881797015666962\n", + "Epoch 500, Loss: 0.1046074628829956\n", + "Epoch 600, Loss: 0.09283030778169632\n", + "Epoch 700, Loss: 0.08289289474487305\n", + "Epoch 800, Loss: 0.0743781179189682\n", + "Epoch 900, Loss: 0.06698659062385559\n" + ] + } + ], + "source": [ + "calibrate(household_weights,\n", + " weight_adjustment,\n", + " values_df,\n", + " targets,\n", + " targets_array,\n", + " equivalisation_factors_array)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "policyengine", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}