-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
350 changed files
with
13,608 additions
and
1,702 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
# Project specific | ||
data/models/** | ||
write-up/resources/** | ||
write-up/** | ||
|
||
*.bak | ||
.gitattributes | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,310 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#default_exp utils" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Utilities\n", | ||
"\n", | ||
"This notebook documents the general utility functions developed in this research\n", | ||
"\n", | ||
"<br>\n", | ||
"\n", | ||
"### Imports" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#exports\n", | ||
"import json\n", | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"\n", | ||
"import junix\n", | ||
"from html.parser import HTMLParser\n", | ||
"from nbdev.export2html import convert_md\n", | ||
"\n", | ||
"import os\n", | ||
"import codecs\n", | ||
"from ipypb import track\n", | ||
"from warnings import warn" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"<br>\n", | ||
"\n", | ||
"### User Inputs" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"dev_nbs_dir = '../nbs'\n", | ||
"docs_dir = '.'\n", | ||
"ug_docs_dir = 'user-guide'\n", | ||
"docs_nb_img_dir = f'{docs_dir}/img/nbs'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"<br>\n", | ||
"\n", | ||
"### Converting the Notebooks to Documentation\n", | ||
"\n", | ||
"We'll first convert the notebooks to markdown" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#exports\n", | ||
"def convert_file_to_json(filepath):\n", | ||
"\n", | ||
" with open(filepath, 'r', encoding='utf8') as f:\n", | ||
" contents = f.read()\n", | ||
" f.close()\n", | ||
"\n", | ||
" return json.loads(contents)\n", | ||
"\n", | ||
"junix.exporter.convert_file_to_json = convert_file_to_json\n", | ||
"\n", | ||
"def encode_file_as_utf8(fp):\n", | ||
" with codecs.open(fp, 'r') as file:\n", | ||
" contents = file.read(1048576)\n", | ||
" file.close()\n", | ||
"\n", | ||
" if not contents:\n", | ||
" pass\n", | ||
" else:\n", | ||
" with codecs.open(fp, 'w', 'utf-8') as file:\n", | ||
" file.write(contents)\n", | ||
" \n", | ||
"def convert_nbs_to_md(nbs_dir, docs_nb_img_dir, docs_dir):\n", | ||
" nb_files = [f for f in os.listdir(nbs_dir) if f[-6:]=='.ipynb']\n", | ||
"\n", | ||
" for nb_file in track(nb_files):\n", | ||
" nb_fp = f'{nbs_dir}/{nb_file}'\n", | ||
" junix.export_images(nb_fp, docs_nb_img_dir)\n", | ||
" convert_md(nb_fp, docs_dir, img_path=f'{docs_nb_img_dir}/', jekyll=False)\n", | ||
"\n", | ||
" md_fp = docs_dir + '/'+ nb_file.replace('.ipynb', '') + '.md'\n", | ||
" encode_file_as_utf8(md_fp)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div><span class=\"Text-label\" style=\"display:inline-block; overflow:hidden; white-space:nowrap; text-overflow:ellipsis; min-width:0; max-width:15ex; vertical-align:middle; text-align:right\"></span>\n", | ||
"<progress style=\"width:60ex\" max=\"14\" value=\"14\" class=\"Progress-main\"/></progress>\n", | ||
"<span class=\"Progress-label\"><strong>100%</strong></span>\n", | ||
"<span class=\"Iteration-label\">14/14</span>\n", | ||
"<span class=\"Time-label\">[00:15<00:00, 1.05s/it]</span></div>" | ||
], | ||
"text/plain": [ | ||
"\u001b[A\u001b[2K\r", | ||
" [████████████████████████████████████████████████████████████] 14/14 [00:15<00:00, 1.05s/it]" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
} | ||
], | ||
"source": [ | ||
"convert_nbs_to_md(dev_nbs_dir, docs_nb_img_dir, docs_dir)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"<br>\n", | ||
"\n", | ||
"We'll then parse the HTML tables into markdown" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#exports\n", | ||
"class MyHTMLParser(HTMLParser):\n", | ||
" def __init__(self):\n", | ||
" super().__init__()\n", | ||
" self.tags = []\n", | ||
" \n", | ||
" def handle_starttag(self, tag, attrs):\n", | ||
" self.tags.append(self.get_starttag_text())\n", | ||
"\n", | ||
" def handle_endtag(self, tag):\n", | ||
" self.tags.append(f\"</{tag}>\")\n", | ||
" \n", | ||
"get_substring_idxs = lambda string, substring: [num for num in range(len(string)-len(substring)+1) if string[num:num+len(substring)]==substring]\n", | ||
"\n", | ||
"def convert_df_to_md(df):\n", | ||
" idx_col = df.columns[0]\n", | ||
" df = df.set_index(idx_col)\n", | ||
" \n", | ||
" if not isinstance(df.index.name, str):\n", | ||
" df.index.name = df.index.name[-1]\n", | ||
" \n", | ||
" df.columns = [col[0] if not isinstance(col, str) else col for col in df.columns]\n", | ||
" \n", | ||
" table_md = df.to_markdown()\n", | ||
" \n", | ||
" return table_md\n", | ||
"\n", | ||
"def extract_div_to_md_table(start_idx, end_idx, table_and_div_tags, file_txt):\n", | ||
" n_start_divs_before = table_and_div_tags[:start_idx].count('<div>')\n", | ||
" n_end_divs_before = table_and_div_tags[:end_idx].count('</div>')\n", | ||
" \n", | ||
" div_start_idx = get_substring_idxs(file_txt, '<div>')[n_start_divs_before-1]\n", | ||
" div_end_idx = get_substring_idxs(file_txt, '</div>')[n_end_divs_before]\n", | ||
"\n", | ||
" div_txt = file_txt[div_start_idx:div_end_idx]\n", | ||
" potential_dfs = pd.read_html(div_txt)\n", | ||
" \n", | ||
" assert len(potential_dfs) == 1, 'Multiple tables were found when there should be only one'\n", | ||
" df = potential_dfs[0]\n", | ||
" md_table = convert_df_to_md(df)\n", | ||
"\n", | ||
" return div_txt, md_table\n", | ||
"\n", | ||
"def extract_div_to_md_tables(md_fp):\n", | ||
" with open(md_fp, 'r') as f:\n", | ||
" file_txt = f.read()\n", | ||
" \n", | ||
" parser = MyHTMLParser()\n", | ||
" parser.feed(file_txt)\n", | ||
"\n", | ||
" table_and_div_tags = [tag for tag in parser.tags if tag in ['<div>', '</div>', '<table border=\"1\" class=\"dataframe\">', '</table>']]\n", | ||
" \n", | ||
" table_start_tag_idxs = [i for i, tag in enumerate(table_and_div_tags) if tag=='<table border=\"1\" class=\"dataframe\">']\n", | ||
" table_end_tag_idxs = [table_start_tag_idx+table_and_div_tags[table_start_tag_idx:].index('</table>') for table_start_tag_idx in table_start_tag_idxs]\n", | ||
"\n", | ||
" div_to_md_tables = []\n", | ||
"\n", | ||
" for start_idx, end_idx in zip(table_start_tag_idxs, table_end_tag_idxs):\n", | ||
" div_txt, md_table = extract_div_to_md_table(start_idx, end_idx, table_and_div_tags, file_txt)\n", | ||
" div_to_md_tables += [(div_txt, md_table)]\n", | ||
" \n", | ||
" return div_to_md_tables\n", | ||
"\n", | ||
"def clean_md_file_tables(md_fp):\n", | ||
" div_to_md_tables = extract_div_to_md_tables(md_fp)\n", | ||
" \n", | ||
" with open(md_fp, 'r') as f:\n", | ||
" md_file_text = f.read()\n", | ||
"\n", | ||
" for div_txt, md_txt in div_to_md_tables:\n", | ||
" md_file_text = md_file_text.replace(div_txt, md_txt)\n", | ||
"\n", | ||
" with open(md_fp, 'w') as f:\n", | ||
" f.write(md_file_text)\n", | ||
" \n", | ||
" return" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"md_fps = [f'{docs_dir}/{f}' for f in os.listdir(docs_dir) if f[-3:]=='.md' if f!='00-utilities.md']\n", | ||
"\n", | ||
"for md_fp in md_fps:\n", | ||
" div_to_md_tables = clean_md_file_tables(md_fp)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"<br>\n", | ||
"\n", | ||
"And finally change the filepaths for any images in the notebooks" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#exports\n", | ||
"def clean_md_file_img_fps(md_fp):\n", | ||
" with open(md_fp, 'r') as f:\n", | ||
" md_file_text = f.read()\n", | ||
"\n", | ||
" md_file_text = md_file_text.replace('../docs/img/nbs', 'img/nbs')\n", | ||
"\n", | ||
" with open(md_fp, 'w') as f:\n", | ||
" f.write(md_file_text)\n", | ||
" \n", | ||
" return" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"for md_fp in md_fps:\n", | ||
" clean_md_file_img_fps(md_fp)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "batopt", | ||
"language": "python", | ||
"name": "batopt" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.1" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.