Skip to content

Commit

Permalink
Merge pull request tudat-team#72 from larshinueber/feature/create-scr…
Browse files Browse the repository at this point in the history
…ipts

Convert jupyter notebooks to python files
  • Loading branch information
DominicDirkx authored Oct 20, 2024
2 parents efb80a2 + c824ad2 commit f306933
Show file tree
Hide file tree
Showing 34 changed files with 3,175 additions and 2,836 deletions.
26 changes: 24 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,28 @@ conda activate tudat-examples
The following guidelines should be followed when creating a new example application.

1. Any modification or addition to this set of examples should be made in a personal fork of the current repository. No changes are to be done directly on a local clone of this repo.
2. The example should be written directly on a Jupyter notebook (`.ipynb` file). Then, the following command can be run from the CLI to create a `.py` file with the same code as the notebook file: `jupyter nbconvert --to python mynotebook.ipynb`. Make sure to change `mynotebook` to the name of the notebook file.
3. The markdown blocks are not optimally converted. Thus, once the `.py` file is created as described above, the script `create_scripts.py` is to be executed. This file reformats the markdown blocks in the `.py` files into a more readable look. Sometimes this cleanup is not perfect, so manually check the `.py` file to make sure everything is fine and correct anything that is not.
2. The example should be written directly on a Jupyter notebook (`.ipynb` file).
3. Convert the finished `.ipynb` example to a `.py` file with the `create_scripts.py` CLI utility:
1. Activate the virtual environment:

```bash
conda activate tudat-examples
```

2. Use the `create_scripts.py` CLI application to convert your notebook:

```bash
python create_scripts.py path/to/your/notebook.ipynb
```

By default, this converts the `.ipynb` notebook to a `.py` file, cleans it, checks for syntax errors and runs it.

3. Use the `-h` flag to see the available options of the CLI utility. A common set of options is

```bash
python create_scripts.py -a --no-run
```

That converts all `.ipynb` files to `.py` files, cleans and checks them for syntax errors but does not run them.

4. At this point, the example is complete. You are ready to create a pull request from your personal fork to the current repository, and the admins will take it from there.
262 changes: 136 additions & 126 deletions create_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,160 +5,170 @@
a copy of the license with this file. If not, please or visit:
http://tudat.tudelft.nl/LICENSE.
"""

# PLEASE NOTE:
# This script is NOT a tudatpy example.
# It is a script to clean the .py files that are generated from Jupyter notebooks, using the following option: File > Download as > Python (.py)
# It is a script to create and clean .py files from Jupyter notebooks.
# See python create_scripts.py -h for more information.
# Running it will automatically edit all the .py example files (please check the changes made before pushing them to the repository).

# Standard library imports
import re
import argparse
import glob
import re
import subprocess
import sys
from pathlib import Path

# Other imports
from tqdm import tqdm


class ErrorCatchingArgumentParser(argparse.ArgumentParser):
"""
Instantiating this class will print the help message and
exit if an error occurs while parsing the arguments.
"""

def error(self, message):
print(f"Error occurred while parsing arguments: {message}\n")
self.print_help()
exit(2)


def parse_cli_arguments() -> dict:

parser = ErrorCatchingArgumentParser(
description="Create and clean .py files from Jupyter notebooks.",
exit_on_error=False,
)

# either provide a notebook path or use the --all flag
group = parser.add_mutually_exclusive_group()
group.add_argument(
"notebook_path", nargs="?", help="Path to the notebook to convert to a script"
)
group.add_argument(
"-a",
"--all",
action="store_true",
help="Create scripts for all notebooks. Ignores the path argument",
)

parser.add_argument(
"--no-clean",
action="store_true",
help="Do not clean the scripts after conversion",
)
parser.add_argument(
"--no-check",
action="store_true",
help="Do not check the scripts for syntax errors",
)
parser.add_argument(
"--no-run", action="store_true", help="Do not run the scripts after conversion"
)

args = parser.parse_args()

return args


# Utilities
def request_confirmation(message):
message = f'{message} [y/N]'
width = max(60, len(message)+20)
return input(
f'{"="*width}\n{message:^{width}}\n{"="*width}\n'
).strip().lower() == 'y'
def generate_script(notebook, clean_script: bool = True):
"""
Transform each notebook into a python script using
the jupyter nbconvert command line utility.
Use custom clean_py template in conversion if clean_script is True.
See https://nbconvert.readthedocs.io/en/latest/customizing.html for nbconvert template documentation.
"""

"""
Use the find command line utility to find the paths of all
notebooks in this repository and store them in a list
"""
example_notebooks = glob.glob('**/*.ipynb', recursive=True)
example_scripts = [notebook.replace('.ipynb', '.py') for notebook in example_notebooks]
all_python_files = glob.glob('**/*.py', recursive=True)
# convert to python instead of generic script
# see https://stackoverflow.com/questions/48568388/nbconvert-suddenly-producing-txt-instead-of-py
command_args = ["jupyter", "nbconvert", notebook, "--to", "python"]

"""
Transform each notebook into a python script using
the jupyter nbconvert command line utility
"""
if clean_script:
command_args += ["--template", "templates/clean_py"]

def generate_script(notebook):
subprocess.run(['jupyter', 'nbconvert', '--to', 'script', notebook])

if request_confirmation('Regenerate Python scripts from Jupyter notebooks?'):
# Generate the python scripts
for notebook in tqdm(example_notebooks): generate_script(notebook)
# Assert that all the notebooks were converted to python scripts
assert all([script in all_python_files for script in example_scripts]), \
f'Unsuccessful: not all notebooks were converted to python scripts. Failed conversions:\n' + \
'\n'.join([script for script in example_scripts if script not in all_python_files])
else:
# If there are missing scripts
if not all([script in all_python_files for script in example_scripts]):
# Generate the missing python scripts
for script in [script for script in example_scripts if script not in all_python_files]:
generate_script(script)
subprocess.run(command_args)

"""
Clean up the python scripts
"""

for example_python_script in example_scripts:

print(f'Cleaning example: {example_python_script}')

with open(example_python_script, "r+") as file:
def clean_script(script):
print(f"Cleaning example: {script}")

with open(script, "r+") as file:

# Read example
example_content = file.readlines()

# Remove file type and encoding
if "!/usr/bin/env python" in example_content[0]:
example_content = example_content[3:]

# State
checking_comment_end = False
skip_next = False

# Indentation
indentation = ''

# Go trough each line in the example
for i, line in enumerate(example_content):

if skip_next:
skip_next = False
continue

# --> Remove the "In[x]" notebook inputs
if "In[" in line:
# Also remove the two lines after
[example_content.pop(i) for _ in range(3)]

# --> End of MD cell
elif checking_comment_end:
# --> End of cell: if the line is empty, the markdown cell is finished
if line == "\n":
# Add """ to close the string comment, then an empty line
example_content[i] = "\"\"\"\n"
example_content.insert(i+1, "\n")
checking_comment_end = False
# --> Second title: detect if we have a second title in the same markdown cell, and mark the separation
elif "##" in line:
example_content[i] = line.replace("# ", "", 1)
example_content.insert(i, "\"\"\"\n\n")
example_content.insert(i+2, "\"\"\"\n")
skip_next = True
# If we are still in the markdown cell, remove the simple # that indicates a comment line
else:
example_content[i] = line.replace("# ", "", 1)

# --> Start of MD cell: if the line starts with # #, we are in a markdown cell that starts with a title
elif "# #" in line:
# Replace the first line to keep the title with a comment #
example_content[i] = line.replace("# ", "", 1)
example_content.insert(i+1, "\"\"\"\n")
if example_content[i+2] == "\n":
example_content.pop(i+2)
checking_comment_end = True # Start looking for the end of the cell

# --> Remove the lines that made the plots interactive
elif "# make plots interactive" in line:
[example_content.pop(i) for _ in range(2)]

# We're in a code cell, so we record the indentation level
else:

# Retrieve the last non-empty line
last_nonempty_line = next(line for line in example_content[i-1::-1] if re.match(r'^( {4}){0,2}\S', line))

# Keep track of current indentation
indentation = ' ' * (len(last_nonempty_line) - len(last_nonempty_line.lstrip()))
if "plt.show()" not in example_content:
example_content.append("\nplt.show()")

file.seek(0)
file.writelines(example_content)
file.truncate()

"""
Check Python scripts for syntax errors
"""
print('\nChecking Python scripts for syntax errors\n')
for example_python_script in example_scripts:

# Test the example
result = subprocess.run(['python', '-m', 'py_compile', example_python_script],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT)
if __name__ == "__main__":

if result.returncode != 0:
print(f'Unsuccessful: syntax error in example: {example_python_script}')
example_scripts.remove(example_python_script)
print('')
args = parse_cli_arguments()

"""
Test python scripts
"""
if request_confirmation('Test generated Python scripts?'):
for example_python_script in example_scripts:
if args.all:
notebooks_to_clean = glob.glob("**/*.ipynb", recursive=True)
else:
notebooks_to_clean = [args.notebook_path]

example_scripts = [
notebook.replace(".ipynb", ".py") for notebook in notebooks_to_clean
]

"""
Generate Python scripts from Jupyter notebooks
"""
for notebook in tqdm(notebooks_to_clean):
generate_script(notebook, (not args.no_clean))

if not args.no_clean:
"""
Clean Python scripts
"""
for example_python_script in example_scripts:
clean_script(example_python_script)

if not args.no_check:
"""
Check Python scripts for syntax errors
"""
NO_SYNTAX_ERRORS = True

print("\nChecking Python scripts for syntax errors...\n")

for example_python_script in example_scripts:

# Test the example
result = subprocess.run(
["python", "-m", "py_compile", example_python_script],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
)

if result.returncode != 0:
print(f"Unsuccessful: syntax error in example: {example_python_script}")
example_scripts.remove(example_python_script)
NO_SYNTAX_ERRORS = False

if NO_SYNTAX_ERRORS:
print("All examples are free of syntax errors.")

print("")

if not args.no_run:
"""
Test python scripts
"""
for example_python_script in example_scripts:

print(f'Testing example: {example_python_script}')
print(f"Testing example: {example_python_script}")

# Test the example
subprocess.run(['python', example_python_script])
# Test the example
subprocess.run(["python", example_python_script])
Loading

0 comments on commit f306933

Please sign in to comment.