WRITEME: Link to snippets anywhere (awsdocs#5392)

Search for snippets across the entire codebase instead of only in a single subfolder.
cpyle0819 · Sep 20, 2023 · a3b38cc · a3b38cc
1 parent 6056d24
commit a3b38cc
Show file tree

Hide file tree

Showing 4 changed files with 92 additions and 28 deletions.
diff --git a/.doc_gen/readmes/config.py b/.doc_gen/readmes/config.py
@@ -13,6 +13,8 @@
     '&aws_sec_sdk_use-federation-warning;': '',
     '&ASH;': 'Security Hub',
     '&DAX;': 'DynamoDB Accelerator',
+    '&EC2long;': 'Amazon Elastic Compute Cloud',
+    '&ELBlong;': 'Elastic Load Balancing',
     '&GLUDCLong;': 'AWS Glue Data Catalog',
     '&GLUDC;': 'Data Catalog',
     '&IAM-user;': 'IAM user',

diff --git a/.doc_gen/readmes/render.py b/.doc_gen/readmes/render.py
@@ -100,6 +100,8 @@ def _transform_actions(self, pre_actions):
 
     def _transform_scenarios(self):
         pre_scenarios = self.scanner.scenarios()
+        _, cross_scenarios = self.scanner.crosses()
+        pre_scenarios.update(cross_scenarios)
         post_scenarios = []
         for pre_id, pre in pre_scenarios.items():
             scenario = {
@@ -116,7 +118,7 @@ def _transform_scenarios(self):
         return sorted(post_scenarios, key=itemgetter('title_abbrev'))
 
     def _transform_crosses(self):
-        pre_crosses = self.scanner.crosses()
+        pre_crosses, _ = self.scanner.crosses()
         post_crosses = []
         for _, pre in pre_crosses.items():
             github = None

diff --git a/.doc_gen/readmes/scanner.py b/.doc_gen/readmes/scanner.py
@@ -7,6 +7,8 @@
 import re
 import yaml
 
+from snippets import Snippet, scan_for_snippets
+
 logger = logging.getLogger(__name__)
 
 
@@ -19,6 +21,7 @@ def __init__(self, meta_folder):
         self.svc_meta = None
         self.example_meta = None
         self.cross_meta = None
+        self.snippets = None
 
     def _load_meta(self, file_name, field):
         if field is not None:
@@ -98,46 +101,45 @@ def scenarios(self):
     def crosses(self):
         self._load_cross()
         crosses = {}
+        scenarios = {}
         for example_name, example in self.cross_meta.items():
             if self.lang_name in example['languages'] and self.svc_name in example['services']:
-                crosses[example_name] = example
-        return crosses
+                if example.get('category', '') == config.categories['scenarios']:
+                    scenarios[example_name] = example
+                else:
+                    crosses[example_name] = example
+        return crosses, scenarios
 
     def snippet(self, example, sdk_ver, readme_folder, api_name):
+        if self.snippets is None:
+            self.snippets = scan_for_snippets('.')
+
         github = None
         tag = None
         tag_path = None
         for ex_ver in example['languages'][self.lang_name]['versions']:
             if ex_ver['sdk_version'] == sdk_ver:
                 github = ex_ver.get('github')
                 if github is not None:
-                    excerpt = ex_ver['excerpts'][0]
-                    tags = excerpt.get('snippet_tags', [])
                     if 'excerpts' in ex_ver:
-                        for t in tags:
-                            if api_name in t:
-                                tag = t
-                        if tag is None:
-                            tag = next(iter(tags), None)
+                        excerpt = ex_ver['excerpts'][0]
+                        if 'snippet_tags' in excerpt:
+                            tags = excerpt.get('snippet_tags', [])
+                            for t in tags:
+                                if api_name in t:
+                                    tag = t
+                            if tag is None:
+                                tag = next(iter(tags), None)
+                        elif 'snippet_files' in excerpt:
+                            snippet_files = excerpt['snippet_files']
+                            # TODO: Find the best (or all?) snippet files, not the first.
+                            tag_path = snippet_files[0]
                     elif 'block_content' in ex_ver:
                         tag_path = github
         if github is not None and tag_path is None:
-            snippet_files = excerpt.get("snippet_files", None)
-            if snippet_files is not None:
-                # TODO: Find the best (or all?) snippet files, not the first.
-                tag_path = snippet_files[0]
-            for root, dirs, files in os.walk(github):
-                for f in files:
-                    try:
-                        with open(os.path.join(root, f), 'r') as search_file:
-                            for index, line in enumerate(search_file.readlines()):
-                                if re.findall(rf'\b{tag}\b', line):
-                                    tag_path = os.path.relpath(search_file.name, readme_folder).replace('\\', '/')
-                                    if api_name != '':
-                                        tag_path += f'#L{index+1}'
-                                    break
-                    except UnicodeDecodeError:
-                        logger.debug("Skipping %s due to unicode decode error.", f)
-                if tag_path is not None:
-                    break
+            snippet = self.snippets.get(tag, None)
+            if snippet is not None:
+                tag_path = os.path.relpath(snippet.path, readme_folder).replace('\\', '/')
+                if api_name != '':
+                    tag_path += f'#L{snippet.line}'
         return tag_path
diff --git a/.doc_gen/readmes/snippets.py b/.doc_gen/readmes/snippets.py
@@ -0,0 +1,58 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+import os
+
+logger = logging.getLogger(__name__)
+
+SNIPPET_START = "snippet-start:["
+IGNORE_FOLDERS = {
+    '.pytest_cache',
+    '__pycache__',
+    'cdk.out',
+    'node_modules',
+    'dist',
+    'target',
+    'venv',
+    '.venv',
+    'bin',
+    'obj',
+    '.doc_gen',
+    '.git-hooks',
+    '.github',
+    '.git',
+}
+EXT_IGNORES = {".png", ".zip", ".jpg", ".wav", ".ico", ".mp3", ".pdf", ".jar", ".swp", ".keystore"}
+
+
+class Snippet:
+    def __init__(self, tag, path, line):
+        self.tag = tag
+        self.path = path
+        self.line = line
+
+    @staticmethod
+    def tag_from_line(token, line) -> str:
+        tag_start = line.find(token) + len(token)
+        tag_end = line.find("]", tag_start)
+        return line[tag_start:tag_end].strip()
+
+
+def scan_for_snippets(root):
+    snippets = {}
+    for root, dirs, files in os.walk(root):
+        dirs[:] = [d for d in dirs if d not in IGNORE_FOLDERS]
+        for f in files:
+            ext = os.path.splitext(f)[1].lower()
+            if ext in EXT_IGNORES:
+                continue
+            try:
+                with open(os.path.join(root, f), 'r') as search_file:
+                    for index, line in enumerate(search_file.readlines()):
+                        if SNIPPET_START in line:
+                            tag = Snippet.tag_from_line(SNIPPET_START, line)
+                            snippets[tag] = Snippet(tag, search_file.name, index + 1)
+            except UnicodeDecodeError:
+                logger.debug("Skipping %s due to unicode decode error.", f)
+    return snippets