Skip to content

Commit

Permalink
Add notebook to convert excel data dictionary to word
Browse files Browse the repository at this point in the history
  • Loading branch information
vivverma9 committed Aug 29, 2024
1 parent 32f40ac commit 37a3340
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 0 deletions.
158 changes: 158 additions & 0 deletions notebooks/make-word-document-from-excel-data-dictionary.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### This code converts the excel data dictionary into word document"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from docx import Document\n",
"from docx.shared import Pt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Inputs/Outputs"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"#Input filename\n",
"input_data_dictionary_excel = '../reports/data_dictionary.xlsx'\n",
"#Output filename\n",
"output_data_dicitionary_word = '../reports/data_dictionary.docx'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Data Loading"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load the Excel sheets into \n",
"df_fields = pd.read_excel(input_data_dictionary_excel, sheet_name='Pydantic Class Documentation')\n",
"df_enums = pd.read_excel(input_data_dictionary_excel, sheet_name='Enum Mappings')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Script/Method"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# Create a new Word document\n",
"doc = Document()\n",
"doc.add_heading('Data Dictionary', 0)\n",
"\n",
"\n",
"# Loop through each row in the fields DataFrame\n",
"for index, row in df_fields.iterrows():\n",
" field_name = row['Field']\n",
" data_type = row['Data Type']\n",
" description = row['Description']\n",
" \n",
" # Skip rows where the Field (variable name) is blank\n",
" if pd.isna(field_name):\n",
" continue\n",
"\n",
" # Handle 'Data Model' differently\n",
" if data_type == 'Data Model':\n",
" doc.add_heading(f'Data Model: {field_name}', level=2)\n",
" doc.add_paragraph(f'Description: {description}')\n",
" else:\n",
" doc.add_heading(f'{field_name}', level=3)\n",
" doc.add_paragraph(f'Description: {description}')\n",
" doc.add_paragraph(f'Response Type: {data_type}')\n",
" \n",
" # Check if the data type is not int, str, bool, or float, and handle enums\n",
" if data_type not in ['int', 'str', 'bool', 'float']:\n",
" doc.add_paragraph('Response Options:')\n",
" \n",
" # Filter the enums for the specific Data Type\n",
" enum_rows = df_enums[df_enums['Enum'] == data_type]\n",
"\n",
" # Create a table for enum mappings/ response options if there are any\n",
" if not enum_rows.empty:\n",
" table = doc.add_table(rows=1, cols=2)\n",
" table.style = 'Table Grid'\n",
"\n",
" # Add header row\n",
" hdr_cells = table.rows[0].cells\n",
" hdr_cells[0].text = 'Code'\n",
" hdr_cells[1].text = 'Label'\n",
"\n",
" # Add enum mappings to the table\n",
" for _, enum_row in enum_rows.iterrows():\n",
" row_cells = table.add_row().cells\n",
" row_cells[0].text = str(enum_row['Codes'])\n",
" row_cells[1].text = str(enum_row['Labels'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Saving the document"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Save the document\n",
"doc.save(output_data_dicitionary_word)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Binary file added reports/data_dictionary.docx
Binary file not shown.

0 comments on commit 37a3340

Please sign in to comment.