Skip to content

Commit

Permalink
Add validation to explicitly check for bare AWS strings in excerpt de…
Browse files Browse the repository at this point in the history
…scriptions.
  • Loading branch information
DavidSouther committed Nov 29, 2024
1 parent 7425df6 commit 37f307b
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 19 deletions.
6 changes: 3 additions & 3 deletions aws_doc_sdk_examples_tools/doc_gen_cli_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

from .doc_gen import DocGen, MetadataError, Example
from .doc_gen_cli import main
from .metadata import DocFilenames, Sdk, Language, SDKPageVersion, Version
from .sdks import SdkVersion
from .metadata import DocFilenames, Language, SDKPageVersion, Version
from .sdks import Sdk, SdkVersion


@pytest.fixture
Expand All @@ -24,7 +24,7 @@ def mock_example():
},
sdk_pages={
"cpp": {
1: SDKPageVersion(actions_scenarios={"medical-imaging": f"link"})
1: SDKPageVersion(actions_scenarios={"medical-imaging": "link"})
}
},
),
Expand Down
27 changes: 13 additions & 14 deletions aws_doc_sdk_examples_tools/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from collections import defaultdict
from dataclasses import dataclass, field
from typing import Any, Dict, Literal, List, Optional, Set, Union, Iterable
from typing import Dict, Literal, List, Optional, Set, Iterable
from os.path import splitext
from pathlib import Path

Expand All @@ -18,9 +18,7 @@
ExampleMergeMismatchedLanguage,
ExampleMergeConflict,
)
from .project_validator import ValidationConfig
from .services import Service
from .sdks import Sdk
from .project_validator import verify_no_invalid_bare_aws


@dataclass
Expand All @@ -44,8 +42,9 @@ class Excerpt:
# all: This content was entirely written by GenAI, and has not been reviewed by a human.
genai: Literal["none", "some", "most", "all"] = "none"

def validate(self, errors: MetadataErrors):
pass
def validate(self, errors: MetadataErrors, path: str):
if self.description:
verify_no_invalid_bare_aws(self.description, path, errors)


@dataclass
Expand All @@ -63,7 +62,7 @@ class Version:
# Link to additional topic places.
more_info: List[Url] = field(default_factory=list)

def validate(self, errors: MetadataErrors, root: Path):
def validate(self, errors: MetadataErrors, root: Path, path: str):
github = self.github
if github is not None:
_, ext = splitext(github)
Expand All @@ -82,8 +81,8 @@ def validate(self, errors: MetadataErrors, root: Path):
)
)

for excerpt in self.excerpts:
excerpt.validate(errors)
for i, excerpt in enumerate(self.excerpts):
excerpt.validate(errors, f"{path}[{i}]")


@dataclass
Expand Down Expand Up @@ -117,10 +116,10 @@ def merge(self, other: "Language", errors: MetadataErrors):
# within the language. If a tributary or writer feels they need to
# modify an excerpt, they should go modify the excerpt directly.

def validate(self, errors: MetadataErrors, root: Path):
def validate(self, errors: MetadataErrors, root: Path, path: str):
errs = MetadataErrors()
for version in self.versions:
version.validate(errs, root)
for i, version in enumerate(self.versions):
version.validate(errs, root, f"{path}[{i}]")
for error in errs:
if isinstance(error, MetadataParseError):
error.language = self.name
Expand Down Expand Up @@ -189,8 +188,8 @@ def merge(self, other: Example, errors: MetadataErrors):

def validate(self, errors: MetadataErrors, root: Path):
errs = MetadataErrors()
for language in self.languages.values():
language.validate(errs, root)
for name, language in self.languages.items():
language.validate(errs, root, f"{self.file}: {self.id}.languages.{name}")
for error in errs:
error.file = self.file
error.id = self.id
Expand Down
49 changes: 48 additions & 1 deletion aws_doc_sdk_examples_tools/metadata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
Excerpt,
)
from .doc_gen import DocGen, parse_examples, check_id_format
from .project_validator import ValidationConfig
from .project_validator import ValidationConfig, InvalidBareAWS
from .sdks import Sdk
from .services import Service, ServiceExpanded

Expand Down Expand Up @@ -977,5 +977,52 @@ def test_no_duplicate_title_abbrev():
assert expected == [*errors]


def test_excerpt_with_bare_aws():
errors = MetadataErrors()
doc_gen = DocGen(
Path(__file__).parent / "test_excerpt_with_bare_aws",
errors=errors,
examples={
"a": Example(
id="a",
file=Path("a"),
title_abbrev="abbr",
category="cat",
languages={
"java": Language(
name="java",
property="java",
versions=[
Version(
sdk_version=1,
excerpts=[
Excerpt(
description="Bare AWS Here",
snippet_files=[],
snippet_tags=[],
)
],
)
],
)
},
services={"svc": set()},
),
},
)
doc_gen.validate()

expected = [
InvalidBareAWS(
file=Path("a"),
id="a",
content="Bare AWS Here",
path="a: a.languages.java[0][0]",
)
]

assert expected == [*errors]


if __name__ == "__main__":
pytest.main([__file__, "-vv"])
19 changes: 18 additions & 1 deletion aws_doc_sdk_examples_tools/project_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import logging
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Set
from typing import Dict, List, Set

from .file_utils import get_files
from .metadata_errors import (
Expand Down Expand Up @@ -206,3 +206,20 @@ def verify_no_secret_keys(
keys -= validation.allow_list
for word in keys:
errors.append(PossibleSecretKey(file=file_location, word=word))


@dataclass
class InvalidBareAWS(MetadataError):
content: str = ""
path: str = ""

def message(self):
return f"Possible bare AWS in {self.path} ({self.content})"


BARE_AWS_REGEX = r"\bAWS\s+[A-Za-z0-9]+\b"


def verify_no_invalid_bare_aws(content: str, path: str, errors: MetadataErrors):
if re.findall(BARE_AWS_REGEX, content):
errors.append(InvalidBareAWS(content=content, path=path))

0 comments on commit 37f307b

Please sign in to comment.