Add notebook to convert excel data dictionary to word

SANDAG · Aug 29, 2024 · 37a3340 · 37a3340
1 parent 32f40ac
commit 37a3340
Show file tree

Hide file tree

Showing 2 changed files with 158 additions and 0 deletions.
diff --git a/notebooks/make-word-document-from-excel-data-dictionary.ipynb b/notebooks/make-word-document-from-excel-data-dictionary.ipynb
@@ -0,0 +1,158 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### This code converts the excel data dictionary into word document"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from docx import Document\n",
+    "from docx.shared import Pt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Inputs/Outputs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Input filename\n",
+    "input_data_dictionary_excel = '../reports/data_dictionary.xlsx'\n",
+    "#Output filename\n",
+    "output_data_dicitionary_word = '../reports/data_dictionary.docx'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Data Loading"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the Excel sheets into \n",
+    "df_fields = pd.read_excel(input_data_dictionary_excel, sheet_name='Pydantic Class Documentation')\n",
+    "df_enums = pd.read_excel(input_data_dictionary_excel, sheet_name='Enum Mappings')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Script/Method"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a new Word document\n",
+    "doc = Document()\n",
+    "doc.add_heading('Data Dictionary', 0)\n",
+    "\n",
+    "\n",
+    "# Loop through each row in the fields DataFrame\n",
+    "for index, row in df_fields.iterrows():\n",
+    "    field_name = row['Field']\n",
+    "    data_type = row['Data Type']\n",
+    "    description = row['Description']\n",
+    "    \n",
+    "    # Skip rows where the Field (variable name) is blank\n",
+    "    if pd.isna(field_name):\n",
+    "        continue\n",
+    "\n",
+    "    # Handle 'Data Model' differently\n",
+    "    if data_type == 'Data Model':\n",
+    "        doc.add_heading(f'Data Model: {field_name}', level=2)\n",
+    "        doc.add_paragraph(f'Description: {description}')\n",
+    "    else:\n",
+    "        doc.add_heading(f'{field_name}', level=3)\n",
+    "        doc.add_paragraph(f'Description: {description}')\n",
+    "        doc.add_paragraph(f'Response Type: {data_type}')\n",
+    "        \n",
+    "        # Check if the data type is not int, str, bool, or float, and handle enums\n",
+    "        if data_type not in ['int', 'str', 'bool', 'float']:\n",
+    "            doc.add_paragraph('Response Options:')\n",
+    "            \n",
+    "            # Filter the enums for the specific Data Type\n",
+    "            enum_rows = df_enums[df_enums['Enum'] == data_type]\n",
+    "\n",
+    "            # Create a table for enum mappings/ response options if there are any\n",
+    "            if not enum_rows.empty:\n",
+    "                table = doc.add_table(rows=1, cols=2)\n",
+    "                table.style = 'Table Grid'\n",
+    "\n",
+    "                # Add header row\n",
+    "                hdr_cells = table.rows[0].cells\n",
+    "                hdr_cells[0].text = 'Code'\n",
+    "                hdr_cells[1].text = 'Label'\n",
+    "\n",
+    "                # Add enum mappings to the table\n",
+    "                for _, enum_row in enum_rows.iterrows():\n",
+    "                    row_cells = table.add_row().cells\n",
+    "                    row_cells[0].text = str(enum_row['Codes'])\n",
+    "                    row_cells[1].text = str(enum_row['Labels'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Saving the document"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save the document\n",
+    "doc.save(output_data_dicitionary_word)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/reports/data_dictionary.docx b/reports/data_dictionary.docx