From 45e8fdec2a12b828b329bbe5daf13b6509a88187 Mon Sep 17 00:00:00 2001
From: Jody Garnett <jody.garnett@gmail.com>
Date: Tue, 19 Sep 2023 09:00:44 -0700
Subject: [PATCH] work on index headings

---
 docs/translate/cli.py       | 50 +++++++++++++++++-------
 docs/translate/translate.py | 77 +++++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 14 deletions(-)

diff --git a/docs/translate/cli.py b/docs/translate/cli.py
index 221a46e6974..faa858f9d33 100644
--- a/docs/translate/cli.py
+++ b/docs/translate/cli.py
@@ -16,6 +16,7 @@
 import translate.translate
 from translate import __app_name__, __version__
 from .translate import collect_paths
+from .translate import index_rst
 from .translate import load_anchors
 from .translate import fix_anchors
 from .translate import convert_rst
@@ -53,7 +54,41 @@ def french(
     print(md_fr,"\n")
 
 @app.command()
-def anchor(
+def index(
+        base_path: Annotated[str, typer.Argument(help="base path for referencs")],
+        rst_path: Annotated[List[str], typer.Argument(help="path to rst file(s)")],
+        anchor_txt: Optional[str] = typer.Option(
+           "anchors.txt",
+           "--anchor",
+           help="anchors.txt file recording reference locations",
+        ),
+    ):
+    """
+    Scan rst files collecting doc and ref targets into anchors.txt
+    """
+    if not os.path.exists(base_path):
+       raise FileNotFoundError(errno.ENOENT, f"The base_path does not exist at location:", base_path)
+    anchor_path = os.path.join(base_path,anchor_txt)
+
+    collected = collect_paths(rst_path,'rst')
+
+    index = ''
+    for file in collected:
+       index += index_rst(base_path,file)
+
+    print(index)
+
+#     anchor_path = base_path+'/'+anchor_txt
+#     anchor_dir = os.path.dirname(anchor_path)
+#     if not os.path.exists(anchor_dir):
+#        print("RST index directory:",anchor_dir)
+#        os.makedirs(anchor_dir)
+#
+#     with open(anchor_path,'w') as anchor_file:
+#         anchor_file.write(index)
+
+@app.command()
+def fix_references(
         anchor_txt: str, md_path: Annotated[List[str], typer.Argument(help="path to md file(s)")]
     ):
     """
@@ -67,19 +102,6 @@ def anchor(
       print(md_file,"fixed",count)
     print()
 
-@app.command()
-def collect(
-        rst_path: Annotated[List[str], typer.Argument(help="path to rst file(s)")]
-    ):
-    """
-    List all rst files for conversion.
-    """
-    collected = collect_paths(rst_path,'rst')
-
-    for file in collected:
-       print(file)
-    print()
-
 @app.command()
 def rst(
         rst_path: Annotated[List[str], typer.Argument(help="path to rst file(s)")],
diff --git a/docs/translate/translate.py b/docs/translate/translate.py
index a555682539c..6c207b13c28 100644
--- a/docs/translate/translate.py
+++ b/docs/translate/translate.py
@@ -82,6 +82,83 @@ def collect_paths(paths: list[str], extension: str) -> list[str]:
 
     return files
 
+def index_rst(base_path: str, rst_file: str) -> str:
+    """
+    Scan through rst_file producing doc and ref indexs
+    """
+    if not os.path.exists(base_path):
+       raise FileNotFoundError(errno.ENOENT, f"RST base_path does not exist at location: {base_path}")
+
+    common_path = os.path.commonpath([base_path,rst_file])
+    if common_path != base_path:
+       raise FileNotFoundError(errno.ENOENT, f"RST base_path '{base_path}' does not contain rst_file: '{rst_file}'")
+
+    with open(rst_file, 'r') as file:
+        text = file.read()
+
+    relative_path = rst_file[len(base_path):]
+    print("base_path path:", base_path)
+    print("rst_file  path:", rst_file)
+    print("relative  path:", relative_path)
+    ref = relative_path
+    heading = None
+    index = ''
+
+    # Scan line by line for references and headings
+    # # with overline, for parts
+    h1 = '#############################################################################################################'
+    # * with overline, for chapters
+    h2 = '*************************************************************************************************************'
+    # =, for sections
+    h3 = '============================================================================================================='
+    # -, for subsections
+    h4 = '-------------------------------------------------------------------------------------------------------------'
+    # ^, for subsubsections
+    h5 = '^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^'
+    # “, for paragraphs
+    h6 = '"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""'
+
+    with open(rst_file, 'r') as file:
+        text = file.read()
+
+    lines = text.splitlines()
+    for i in range(2,len(lines)):
+        line = lines[i]
+        length = len(line)
+        print("scan:",line)
+
+        if length == 0:
+            continue
+
+        if ref:
+            # scan for headlines
+            if line == h1[0:length] or line == h2[0:length]:
+                if i < len(lines)-2:
+                    # if we are an overline, expect an underline 2 lines down
+                    line2 = lines[i+2]
+                    length2 = len(line2)
+                    if length2 > 0 and (line2 == h1[0:length] or line2 == h2[0:length]):
+                        # wait to process until underline
+                        continue
+            if (
+                   line == h1[0:length] or line == h2[0:length] or
+                   line == h2[0:length] or line == h3[0:length]
+               ):
+                before = lines[i-1]
+                if len(before) > 0 and len(before) <= length:
+                    # heading identified
+                    heading = before
+                    anchor = heading
+                    index += ref + '.path=' + relative_path + '#' + anchor + "\n"
+                    index += ref + '.text=' + heading + "\n"
+                    return index
+                else:
+                    continue
+        else:
+            # scan for next reference
+            continue
+    return index
+
 # administrator-guide/managing-metadata-standards/configure-validation.md
 def fix_anchors(anchors: dict[str,str], md_file: str) -> int:
     """