Merge pull request tudat-team#72 from larshinueber/feature/create-scr…

…ipts Convert jupyter notebooks to python files
luigigisolfi · Oct 20, 2024 · f306933 · f306933
2 parents efb80a2 + c824ad2
commit f306933
Show file tree

Hide file tree

Showing 34 changed files with 3,175 additions and 2,836 deletions.
diff --git a/README.md b/README.md
@@ -132,6 +132,28 @@ conda activate tudat-examples
 The following guidelines should be followed when creating a new example application.
 
 1. Any modification or addition to this set of examples should be made in a personal fork of the current repository. No changes are to be done directly on a local clone of this repo.
-2. The example should be written directly on a Jupyter notebook (`.ipynb` file). Then, the following command can be run from the CLI to create a `.py` file with the same code as the notebook file: `jupyter nbconvert --to python mynotebook.ipynb`. Make sure to change `mynotebook` to the name of the notebook file.
-3. The markdown blocks are not optimally converted. Thus, once the `.py` file is created as described above, the script `create_scripts.py` is to be executed. This file reformats the markdown blocks in the `.py` files into a more readable look. Sometimes this cleanup is not perfect, so manually check the `.py` file to make sure everything is fine and correct anything that is not.
+2. The example should be written directly on a Jupyter notebook (`.ipynb` file).
+3. Convert the finished `.ipynb` example to a `.py` file with the `create_scripts.py` CLI utility:
+    1. Activate the virtual environment:
+
+        ```bash
+        conda activate tudat-examples
+        ```
+
+    2. Use the `create_scripts.py` CLI application to convert your notebook:
+
+        ```bash
+        python create_scripts.py path/to/your/notebook.ipynb
+        ```
+
+        By default, this converts the `.ipynb` notebook to a `.py` file, cleans it, checks for syntax errors and runs it.
+
+    3. Use the `-h` flag to see the available options of the CLI utility. A common set of options is
+
+        ```bash
+        python create_scripts.py -a --no-run
+        ```
+
+        That converts all `.ipynb` files to `.py` files, cleans and checks them for syntax errors but does not run them.
+
 4. At this point, the example is complete. You are ready to create a pull request from your personal fork to the current repository, and the admins will take it from there.
diff --git a/create_scripts.py b/create_scripts.py
@@ -5,160 +5,170 @@
 a copy of the license with this file. If not, please or visit:
 http://tudat.tudelft.nl/LICENSE.
 """
+
 # PLEASE NOTE:
 # This script is NOT a tudatpy example.
-# It is a script to clean the .py files that are generated from Jupyter notebooks, using the following option: File > Download as > Python (.py)
+# It is a script to create and clean .py files from Jupyter notebooks.
+# See python create_scripts.py -h for more information.
 # Running it will automatically edit all the .py example files (please check the changes made before pushing them to the repository).
 
 # Standard library imports
-import re
+import argparse
 import glob
+import re
 import subprocess
+import sys
+from pathlib import Path
 
 # Other imports
 from tqdm import tqdm
 
+
+class ErrorCatchingArgumentParser(argparse.ArgumentParser):
+    """
+    Instantiating this class will print the help message and
+    exit if an error occurs while parsing the arguments.
+    """
+
+    def error(self, message):
+        print(f"Error occurred while parsing arguments: {message}\n")
+        self.print_help()
+        exit(2)
+
+
+def parse_cli_arguments() -> dict:
+
+    parser = ErrorCatchingArgumentParser(
+        description="Create and clean .py files from Jupyter notebooks.",
+        exit_on_error=False,
+    )
+
+    # either provide a notebook path or use the --all flag
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument(
+        "notebook_path", nargs="?", help="Path to the notebook to convert to a script"
+    )
+    group.add_argument(
+        "-a",
+        "--all",
+        action="store_true",
+        help="Create scripts for all notebooks. Ignores the path argument",
+    )
+
+    parser.add_argument(
+        "--no-clean",
+        action="store_true",
+        help="Do not clean the scripts after conversion",
+    )
+    parser.add_argument(
+        "--no-check",
+        action="store_true",
+        help="Do not check the scripts for syntax errors",
+    )
+    parser.add_argument(
+        "--no-run", action="store_true", help="Do not run the scripts after conversion"
+    )
+
+    args = parser.parse_args()
+
+    return args
+
+
 # Utilities
-def request_confirmation(message):
-    message = f'{message} [y/N]'
-    width = max(60, len(message)+20)
-    return input(
-        f'{"="*width}\n{message:^{width}}\n{"="*width}\n'
-    ).strip().lower() == 'y'
+def generate_script(notebook, clean_script: bool = True):
+    """
+    Transform each notebook into a python script using
+    the jupyter nbconvert command line utility.
+    Use custom clean_py template in conversion if clean_script is True.
+    See https://nbconvert.readthedocs.io/en/latest/customizing.html for nbconvert template documentation.
+    """
 
-"""
-Use the find command line utility to find the paths of all
-notebooks in this repository and store them in a list
-"""
-example_notebooks = glob.glob('**/*.ipynb', recursive=True)
-example_scripts = [notebook.replace('.ipynb', '.py') for notebook in example_notebooks]
-all_python_files = glob.glob('**/*.py', recursive=True)
+    # convert to python instead of generic script
+    # see https://stackoverflow.com/questions/48568388/nbconvert-suddenly-producing-txt-instead-of-py
+    command_args = ["jupyter", "nbconvert", notebook, "--to", "python"]
 
-"""
-Transform each notebook into a python script using
-the jupyter nbconvert command line utility
-"""
+    if clean_script:
+        command_args += ["--template", "templates/clean_py"]
 
-def generate_script(notebook):
-    subprocess.run(['jupyter', 'nbconvert', '--to', 'script', notebook])
-
-if request_confirmation('Regenerate Python scripts from Jupyter notebooks?'):
-    # Generate the python scripts
-    for notebook in tqdm(example_notebooks): generate_script(notebook)
-    # Assert that all the notebooks were converted to python scripts
-    assert all([script in all_python_files for script in example_scripts]), \
-        f'Unsuccessful: not all notebooks were converted to python scripts. Failed conversions:\n' + \
-         '\n'.join([script for script in example_scripts if script not in all_python_files])
-else:
-    # If there are missing scripts
-    if not all([script in all_python_files for script in example_scripts]):
-        # Generate the missing python scripts
-        for script in [script for script in example_scripts if script not in all_python_files]:
-            generate_script(script)
+    subprocess.run(command_args)
 
-"""
-Clean up the python scripts
-"""
 
-for example_python_script in example_scripts:
-
-    print(f'Cleaning example:              {example_python_script}')
-
-    with open(example_python_script, "r+") as file:
+def clean_script(script):
+    print(f"Cleaning example:              {script}")
+
+    with open(script, "r+") as file:
 
         # Read example
         example_content = file.readlines()
 
-        # Remove file type and encoding
-        if "!/usr/bin/env python" in example_content[0]:
-            example_content = example_content[3:]
-
-        # State
-        checking_comment_end = False
-        skip_next = False
-
-        # Indentation
-        indentation = ''
-
-        # Go trough each line in the example
-        for i, line in enumerate(example_content):
-
-            if skip_next:
-                skip_next = False
-                continue
-
-            # --> Remove the "In[x]" notebook inputs    
-            if "In[" in line:
-                # Also remove the two lines after
-                [example_content.pop(i) for _ in range(3)]
-
-            # --> End of MD cell
-            elif checking_comment_end:
-                # --> End of cell: if the line is empty, the markdown cell is finished
-                if line == "\n":
-                    # Add """ to close the string comment, then an empty line
-                    example_content[i] = "\"\"\"\n"
-                    example_content.insert(i+1, "\n")
-                    checking_comment_end = False
-                # --> Second title: detect if we have a second title in the same markdown cell, and mark the separation
-                elif "##" in line:
-                    example_content[i] = line.replace("# ", "", 1)
-                    example_content.insert(i, "\"\"\"\n\n")
-                    example_content.insert(i+2, "\"\"\"\n")
-                    skip_next = True
-                # If we are still in the markdown cell, remove the simple # that indicates a comment line
-                else:
-                    example_content[i] = line.replace("# ", "", 1)
-
-            # --> Start of MD cell: if the line starts with # #, we are in a markdown cell that starts with a title
-            elif "# #" in line:
-                # Replace the first line to keep the title with a comment #
-                example_content[i] = line.replace("# ", "", 1)
-                example_content.insert(i+1, "\"\"\"\n")
-                if example_content[i+2] == "\n":
-                    example_content.pop(i+2)
-                checking_comment_end = True # Start looking for the end of the cell
-
-            # --> Remove the lines that made the plots interactive
-            elif "# make plots interactive" in line:
-                [example_content.pop(i) for _ in range(2)]
-
-            # We're in a code cell, so we record the indentation level
-            else:
-
-                # Retrieve the last non-empty line
-                last_nonempty_line = next(line for line in example_content[i-1::-1] if re.match(r'^( {4}){0,2}\S', line))
-
-                # Keep track of current indentation
-                indentation = ' ' * (len(last_nonempty_line) - len(last_nonempty_line.lstrip()))
+        if "plt.show()" not in example_content:
+            example_content.append("\nplt.show()")
 
         file.seek(0)
         file.writelines(example_content)
         file.truncate()
 
-"""
-Check Python scripts for syntax errors
-"""
-print('\nChecking Python scripts for syntax errors\n')
-for example_python_script in example_scripts:
 
-    # Test the example
-    result = subprocess.run(['python', '-m', 'py_compile', example_python_script],
-                            stdout=subprocess.DEVNULL,
-                            stderr=subprocess.STDOUT)
+if __name__ == "__main__":
 
-    if result.returncode != 0: 
-        print(f'Unsuccessful: syntax error in example: {example_python_script}')
-        example_scripts.remove(example_python_script)
-print('')
+    args = parse_cli_arguments()
 
-"""
-Test python scripts
-"""
-if request_confirmation('Test generated Python scripts?'):
-    for example_python_script in example_scripts:
+    if args.all:
+        notebooks_to_clean = glob.glob("**/*.ipynb", recursive=True)
+    else:
+        notebooks_to_clean = [args.notebook_path]
+
+    example_scripts = [
+        notebook.replace(".ipynb", ".py") for notebook in notebooks_to_clean
+    ]
+
+    """
+    Generate Python scripts from Jupyter notebooks
+    """
+    for notebook in tqdm(notebooks_to_clean):
+        generate_script(notebook, (not args.no_clean))
+
+    if not args.no_clean:
+        """
+        Clean Python scripts
+        """
+        for example_python_script in example_scripts:
+            clean_script(example_python_script)
+
+    if not args.no_check:
+        """
+        Check Python scripts for syntax errors
+        """
+        NO_SYNTAX_ERRORS = True
+
+        print("\nChecking Python scripts for syntax errors...\n")
+
+        for example_python_script in example_scripts:
+
+            # Test the example
+            result = subprocess.run(
+                ["python", "-m", "py_compile", example_python_script],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.STDOUT,
+            )
+
+            if result.returncode != 0:
+                print(f"Unsuccessful: syntax error in example: {example_python_script}")
+                example_scripts.remove(example_python_script)
+                NO_SYNTAX_ERRORS = False
+
+        if NO_SYNTAX_ERRORS:
+            print("All examples are free of syntax errors.")
+
+        print("")
+
+    if not args.no_run:
+        """
+        Test python scripts
+        """
+        for example_python_script in example_scripts:
 
-        print(f'Testing example:               {example_python_script}')
+            print(f"Testing example:               {example_python_script}")
 
-        # Test the example
-        subprocess.run(['python', example_python_script])
+            # Test the example
+            subprocess.run(["python", example_python_script])