-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add notebook to convert excel data dictionary to word
- Loading branch information
Showing
2 changed files
with
158 additions
and
0 deletions.
There are no files selected for viewing
158 changes: 158 additions & 0 deletions
158
notebooks/make-word-document-from-excel-data-dictionary.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### This code converts the excel data dictionary into word document" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"from docx import Document\n", | ||
"from docx.shared import Pt" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Inputs/Outputs" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 19, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#Input filename\n", | ||
"input_data_dictionary_excel = '../reports/data_dictionary.xlsx'\n", | ||
"#Output filename\n", | ||
"output_data_dicitionary_word = '../reports/data_dictionary.docx'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Data Loading" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Load the Excel sheets into \n", | ||
"df_fields = pd.read_excel(input_data_dictionary_excel, sheet_name='Pydantic Class Documentation')\n", | ||
"df_enums = pd.read_excel(input_data_dictionary_excel, sheet_name='Enum Mappings')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Script/Method" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 18, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Create a new Word document\n", | ||
"doc = Document()\n", | ||
"doc.add_heading('Data Dictionary', 0)\n", | ||
"\n", | ||
"\n", | ||
"# Loop through each row in the fields DataFrame\n", | ||
"for index, row in df_fields.iterrows():\n", | ||
" field_name = row['Field']\n", | ||
" data_type = row['Data Type']\n", | ||
" description = row['Description']\n", | ||
" \n", | ||
" # Skip rows where the Field (variable name) is blank\n", | ||
" if pd.isna(field_name):\n", | ||
" continue\n", | ||
"\n", | ||
" # Handle 'Data Model' differently\n", | ||
" if data_type == 'Data Model':\n", | ||
" doc.add_heading(f'Data Model: {field_name}', level=2)\n", | ||
" doc.add_paragraph(f'Description: {description}')\n", | ||
" else:\n", | ||
" doc.add_heading(f'{field_name}', level=3)\n", | ||
" doc.add_paragraph(f'Description: {description}')\n", | ||
" doc.add_paragraph(f'Response Type: {data_type}')\n", | ||
" \n", | ||
" # Check if the data type is not int, str, bool, or float, and handle enums\n", | ||
" if data_type not in ['int', 'str', 'bool', 'float']:\n", | ||
" doc.add_paragraph('Response Options:')\n", | ||
" \n", | ||
" # Filter the enums for the specific Data Type\n", | ||
" enum_rows = df_enums[df_enums['Enum'] == data_type]\n", | ||
"\n", | ||
" # Create a table for enum mappings/ response options if there are any\n", | ||
" if not enum_rows.empty:\n", | ||
" table = doc.add_table(rows=1, cols=2)\n", | ||
" table.style = 'Table Grid'\n", | ||
"\n", | ||
" # Add header row\n", | ||
" hdr_cells = table.rows[0].cells\n", | ||
" hdr_cells[0].text = 'Code'\n", | ||
" hdr_cells[1].text = 'Label'\n", | ||
"\n", | ||
" # Add enum mappings to the table\n", | ||
" for _, enum_row in enum_rows.iterrows():\n", | ||
" row_cells = table.add_row().cells\n", | ||
" row_cells[0].text = str(enum_row['Codes'])\n", | ||
" row_cells[1].text = str(enum_row['Labels'])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Saving the document" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Save the document\n", | ||
"doc.save(output_data_dicitionary_word)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Binary file not shown.