From 98b7c1da99100e0ca9951881eaff999e67ca5771 Mon Sep 17 00:00:00 2001 From: Tomas Peterka Date: Mon, 5 Feb 2024 15:12:06 +0100 Subject: [PATCH] feat: allow user to specify format using --format=detail|commit|markdown feat: support git submodules as default observed dependencies --- .pre-commit-config.yaml | 2 +- README.md | 63 ++++++++++++++++++++++------------ gira/__main__.py | 2 +- gira/cache.py | 31 +++++++---------- gira/config.py | 72 ++++++++++++++++++++++++++------------- gira/core.py | 20 ++++------- gira/gira.py | 38 +++++++++++++++++---- gira/repo.py | 75 ++++++++++++++++++++++++++++++++++++++--- 8 files changed, 214 insertions(+), 89 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d24952e..d136d10 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,7 @@ repos: types: [python] - id: ruff-format - name: ruff-format + name: ruff format entry: ruff format --diff language: system types: [python] diff --git a/README.md b/README.md index 0d21822..5c64755 100644 --- a/README.md +++ b/README.md @@ -1,50 +1,71 @@ # Gira -Gira checks for changes in projects dependencies and prints out all JIRA tags -found in commit messages between old and new version. +Gira gathers JIRA tickets from commit messages of your updated dependencies when you +change them in one of: `pyproject.toml`, `poetry.lock`, `west.yaml`, and `pubspec.yaml`. +It is especially usefull in "prepare-commit-msg" stage of [pre-commit](https://pre-commit.com) -Dependency changes are taken from current staged/unstaged or previous commit diff -of available lock files (poetry, pyproject, package-lock...). +//Disclaimer: works the best if your dependencies follow [semantic release](https://semantic-release.gitbook.io/semantic-release/) (have tags in `v{X.Y.*}` format)// -Commit messages are taken from projects that follow semantic release (have tags in -`v{X.Y.*}` format). -JIRA tickets are parsed based on a regular expression `[A-Z]+-\d+`. +## Usage -The unified output is as following +```bash +gira [-r revision] [--format="commit|detail|markdown"] +``` + +Revision can be tag/branch or a commit. Gira will check dependency files for changes between +the current version and the revision version. + +Format is useful if you generate into a commit message (only ticket names (e.g. JIRA-1234)) +or you want a user readable "detailed" print or the same but markdown formatted. ```bash -$ gira +$ gira [--format=commit] internal-dependency1 => : JIRA-123, JIRA-567 other-followed-lib => : JIRA-876, JIRA-543 ``` ## Configuration -Gira is configured either by pyproject.toml or standalone .gira file. Gira -needs to know the names of followed dependencies and their git urls. +Gira is configured either by pyproject.toml or standalone .gira.yaml or actually any other +YAML file that you specify with `-c` and has "gira.observe" and optionally "gira.jira" keys. + +### Observed Dependencies -Example config: +Observed dependencies are in form of NAME=git-url where NAME must be the same as specified +in your dependency file (e.g. pyproject.toml or a YAML). ```toml -[tool.gira.dependencies] +[tool.gira.observe] internal-lib1 = "github.com/company/internal-lib1" other-dependency = "bitbucket.com/company/other-dependency" ``` -## Optional configuration: JIRA +### Submodules -```toml -[tool.gira.jira] -url = "jira.yourcompany.com" -token = "token" +Submodules are automatically added into observed dependencies. You can turn off support +for submodules by settings `gira.submodules=false` in your config file. + + +### JIRA (optional) + +Example of a YAML configuration file section for JIRA (for pyproject.toml use `tool.gira.jira`). +Token and email can be passed via environment variables `JIRA_TOKEN` or `GIRA_JIRA_TOKEN` and +`JIRA_EMAIL` or `GIRA_JIRA_EMAIL`. + +```yaml +jira: + url: jira.yourcompany.com + token: token + email: your@email.com ``` -If you provide valid JIRA connection infromation the output will change to +Setting JIRA connection information allows for "detailed" and "markdown" formatting of the output +as follows: ```bash $ gira internal-dependency1 => : - JIRA-123: details about the issue (summary) - JIRA-567: details about the issue (summary) + JIRA-123: details about the issue (url) + JIRA-567: details about the issue (url) ``` diff --git a/gira/__main__.py b/gira/__main__.py index ae05856..d34b8bf 100644 --- a/gira/__main__.py +++ b/gira/__main__.py @@ -26,7 +26,7 @@ def main(config: str, ref: str, verbose: bool, format: str, args: list[str]) -> logging.getLogger("urllib3").setLevel(logging.WARNING) conf = config_parser.from_file(Path(config) if config else None) - if not conf.dependencies: + if not conf.observe and not conf.submodules: logger.error("No observed dependencies found in gira configuration file") return 1 diff --git a/gira/cache.py b/gira/cache.py index 5fbb741..f58c6c6 100644 --- a/gira/cache.py +++ b/gira/cache.py @@ -3,15 +3,14 @@ import subprocess from pathlib import Path -import pygit2 +import pygit2 # type: ignore -from . import logger +from . import logger, repo CACHE_DIR = Path(".gira_cache") -MESSAGE_LIMIT = 250 -def messages(project: str, url: str, a: str, b: str) -> list[str]: +def cache(project: str, url: str) -> pygit2.Repository: """Return commit messages between two revisions a and b""" repo_dir = CACHE_DIR / (project + ".git") if not CACHE_DIR.exists(): @@ -29,19 +28,13 @@ def messages(project: str, url: str, a: str, b: str) -> list[str]: else: logger.debug("Fetching from origin") subprocess.run(["git", "fetch", "origin"], cwd=repo_dir, check=True, capture_output=True) + return repo.Repo(repo_dir, ref="HEAD", bare=True) + + +def messages(project: str, url: str, a: str, b: str) -> list[str]: + """Return commit messages between two revisions a and b for cached git repository - logger.debug(f"Getting commit messages from {a} to {b} (in reverse chronological order)") - repository = pygit2.Repository(repo_dir, pygit2.GIT_REPOSITORY_OPEN_BARE) - ending_tag = repository.revparse_single(a) - starting_tag = repository.revparse_single(b) - - commits = repository.walk(ending_tag.oid) - messages = [] - for i, commit in enumerate(commits): - messages.append(commit.message.strip()) - if commit.oid.hex == starting_tag.oid.hex: - break - if i >= MESSAGE_LIMIT: - logger.warning(f"Reached limit {MESSAGE_LIMIT} commits for {project}") - break - return messages + @deprecated use cache() and repo.messages() instead. + """ + repo = cache(project, url) + return repo.messages(a, b) diff --git a/gira/config.py b/gira/config.py index 701873e..2349f28 100644 --- a/gira/config.py +++ b/gira/config.py @@ -10,47 +10,51 @@ import yaml from . import logger -from .core import Config from .jira import Jira DEFAULT_CONFIG_PATHS = ( ".gira.yaml", "pyproject.toml", + "west.yml", + "west.yaml", ) +class Config: + jira: Jira + observe: dict[str, str] # name -> url + submodules: bool + + def __init__(self, jira, observe, submodules=True): + self.jira = jira + self.observe = observe + self.submodules = submodules + + def from_file(path: Optional[Path]) -> Config: - """Load observed dependencies from configuration file""" + """Load configuration file""" if path and path.exists(): return _parse_file(path) for path_str in DEFAULT_CONFIG_PATHS: if Path(path_str).exists(): - try: - return _parse_file(Path(path_str)) - except RuntimeError as e: - logger.debug(str(e)) + logger.debug(f"Loading configuration from {path_str}") + return _parse_file(Path(path_str)) - raise RuntimeError( - f"Cannot find valid configuration file in the default paths {DEFAULT_CONFIG_PATHS}" - ) - - -def _section(d, path): - for key in path.split("."): - if key not in d: - return {} - d = d[key] - return d + raise FileNotFoundError("No configuration file found") def _parse_file(path: Path) -> Config: - logger.debug(f"Loading observed dependencies from {path}") if path.name == "pyproject.toml": return _pytoml(path) if path.name == ".gira.yaml": return _conf(path) - raise RuntimeError(f"Unknown configuration file format {path}") + if path.name.startswith("west"): + return _west(path) + if path.name.endswith(".yaml"): + return _generic_yaml(path) + logger.warning("Running with empty configuration") + return Config(jira=Jira(), observe={}) def _pytoml(path: Path) -> Config: @@ -59,16 +63,38 @@ def _pytoml(path: Path) -> Config: parsed = toml.load(f) return Config( jira=_section(parsed, "tool.gira.jira"), - dependencies=_section(parsed, "tool.gira.dependencies"), + observe=_section(parsed, "tool.gira.observe"), ) def _conf(path: Path) -> Config: """Parse watched dependencies by GIRA from .girarc""" parsed = yaml.load(path.read_text(), Loader=yaml.SafeLoader) - return Config(jira=Jira(**parsed.get("jira", {})), dependencies=parsed.get("dependencies", {})) + return Config(jira=Jira(**_section(parsed, "jira")), observe=_section(parsed, "observe")) + + +def _generic_yaml(path: Path) -> Config: + """Parse watched dependencies by GIRA from .girarc""" + parsed = yaml.load(path.read_text(), Loader=yaml.SafeLoader) + return Config( + jira=Jira(**_section(parsed, "gira.jira")), observe=_section(parsed, "gira.observe") + ) def _west(path: Path) -> Config: - _ = yaml.load(path.read_text(), Loader=yaml.SafeLoader) - raise NotImplementedError("Not implemented yet") + parsed = yaml.load(path.read_text(), Loader=yaml.SafeLoader) + jira = _section(parsed, "manifest.gira.jira") + observe = _section(parsed, "manifest.gira.observe") + return Config( + jira=Jira(**jira), + observe=observe, + ) + + +def _section(d: Optional[dict], path: str) -> dict: + """Return dot-separated path from dictionary""" + for key in path.split("."): + if not d or key not in d: + return {} + d = d[key] + return d or {} diff --git a/gira/core.py b/gira/core.py index bd58659..65670c8 100644 --- a/gira/core.py +++ b/gira/core.py @@ -1,4 +1,4 @@ -from .jira import Jira +from typing import Optional class Dependency: @@ -15,15 +15,6 @@ def __str__(self): return self.name -class Config: - jira: Jira - dependencies: dict[str, str] # name -> url - - def __init__(self, jira, dependencies): - self.jira = jira - self.dependencies = dependencies - - class Change: name: str version: str @@ -40,14 +31,15 @@ def __str__(self): class Upgrade: name: str - old_version: str - new_version: str + old_version: Optional[str] + new_version: Optional[str] + messages: Optional[list[str]] - def __init__(self, name, old_version=None, new_version=None): + def __init__(self, name, old_version=None, new_version=None, messages=None): self.name = name self.old_version = old_version self.new_version = new_version - self.tickets = {} + self.messages = messages def __str__(self): return f"{self.name} {self.old_version} => {self.new_version}:" diff --git a/gira/gira.py b/gira/gira.py index 4221b4f..fefa894 100644 --- a/gira/gira.py +++ b/gira/gira.py @@ -11,7 +11,7 @@ def gira(config: config.Config, stream: TextIO, format: str, ref: Optional[str]) # Diff current repository using firstly the revision if specified, then staged changes, # unstaged changes and finally try diff with last commit. We use diffs with 3 context lines that # are necessary for example for poetry.lock that has records spread over multiple lines - repository = repo.Repo(".", ref=ref) + repository = repo.Repo(Path("."), ref=ref) files: list[Path] = repository.changed_files() logger.debug(f"Changed files from {repository.ref}: {files}") @@ -19,8 +19,8 @@ def gira(config: config.Config, stream: TextIO, format: str, ref: Optional[str]) upgrades: list[core.Upgrade] = [] for file in filter(deps.parseable, files): logger.debug(f"Processing {file} for dependencies") - pre = deps.parse(file, repository.get_old_content(file), config.dependencies) - post = deps.parse(file, repository.get_current_content(file), config.dependencies) + pre = deps.parse(file, repository.get_old_content(file), config.observe) + post = deps.parse(file, repository.get_current_content(file), config.observe) for dep_name in pre.keys(): if dep_name in post and pre.get(dep_name) != post.get(dep_name): upgrades.append( @@ -29,12 +29,38 @@ def gira(config: config.Config, stream: TextIO, format: str, ref: Optional[str]) ) ) + # Added support for submodules - we cannot cache them because they are already "cached" in + # .git/modules directory. Hence we just get the messages from the submodule repository + if config.submodules and repository.has_submodules: + for file in files: + if file in repository.submodules: + name = repository.submodules[file] + module_path = Path(".git/modules/", name) + old_version, new_version = repository.submodule_change(file) + upgrades.append( + core.Upgrade( + name=name, + old_version=old_version, + new_version=new_version, + messages=repo.Repo(module_path, bare=True, ref="HEAD").messages( + old_version + ), + ) + ) + # extract JIRA tickets from commit messages between two tags that follow semantic release # modify upgrades by creating dict with keys but empty values (ready for summaries of tickets) for upgrade in upgrades: - url = config.dependencies[upgrade.name] # this might return an object with more information - messages = cache.messages(upgrade.name, url, upgrade.new_version, upgrade.old_version) - tickets = {ticket for m in messages for ticket in jira.extract_ticket_names(m)} + if upgrade.messages is None: + url = config.observe[upgrade.name] # this might return an object with more information + upgrade.messages = cache.cache(upgrade.name, url).messages( + upgrade.old_version, upgrade.new_version + ) + logger.debug( + f"Messages for {upgrade.name} between {upgrade.new_version} and" + f" {upgrade.old_version}: {upgrade.messages}" + ) + tickets = {ticket for m in upgrade.messages for ticket in jira.extract_ticket_names(m)} if len(tickets) == 0: logger.info( diff --git a/gira/repo.py b/gira/repo.py index 4bb2a06..5dcee65 100644 --- a/gira/repo.py +++ b/gira/repo.py @@ -1,20 +1,37 @@ from pathlib import Path from typing import Optional -import pygit2 +import pygit2 # type: ignore + +from . import logger class Repo: - path: str + path: Path ref: str bare: bool repo: pygit2.Repository + _submodules: Optional[dict[Path, str]] + MESSAGE_LIMIT = 250 - def __init__(self, path: str, ref: Optional[str] = "", bare: bool = False): + def __init__(self, path: Path, ref: Optional[str] = "", bare: bool = False): self.path = path - self.repo = pygit2.Repository(path, pygit2.GIT_REPOSITORY_OPEN_BARE if bare else 0) + self.repo = pygit2.Repository(str(path), pygit2.GIT_REPOSITORY_OPEN_BARE if bare else 0) self.bare = bare self.ref = self._check_ref(ref) + self._submodules = None + + @property + def has_submodules(self) -> bool: + """Check if repository has submodules""" + return len(self.repo.listall_submodules()) > 0 + + @property + def submodules(self) -> dict[Path, str]: + """Return dict of submodules paths and names""" + if not self._submodules: + self._submodules = {Path(s.path): s.name for s in self.repo.submodules} + return self._submodules def changed_files(self) -> list[Path]: """List changed filenames in the repository since `self.ref` revision""" @@ -29,6 +46,25 @@ def changed_files(self) -> list[Path]: files.add(diff.delta.new_file.path) return [Path(s) for s in files] + def submodule_change(self, submodule_path: Path) -> tuple[str, str]: + """Return list of lines added and removed in the diff""" + try: + diffs = self.repo.diff(self.ref, context_lines=0) + except KeyError: + raise RuntimeError(f"Revision {self.ref} does not exist") + + for diff in diffs: + if Path(diff.delta.new_file.path) == submodule_path: + old_version = "" + new_version = "" + for line in diff.hunks[0].lines: + if line.origin == "+": + new_version = line.content.strip().split(" ")[-1] + if line.origin == "-": + old_version = line.content.strip().split(" ")[-1] + return (old_version, new_version) + return ("", "") + def get_current_content(self, path: Path) -> str: """Get content of given filepath as it is on the disk right now""" if not path.exists(): @@ -61,3 +97,34 @@ def _check_ref(self, ref: Optional[str]): return "HEAD^" return "HEAD" + + def messages(self, a: str, b: Optional[str] = None): + """Get messages between two revisions a and b (in reverse chronological order) + + @throws KeyError in case of invalid references + """ + logger.debug(f"Getting messages between {a} and {b or self.ref} for {self.path.name}") + past_commit = self.repo.revparse_single(a) + current_commit = ( + self.repo.revparse_single(self.ref) if b is None else self.repo.revparse_single(b) + ) + + if not isinstance(past_commit, pygit2.Commit): + past_commit = past_commit.peel(pygit2.Commit) + if not isinstance(current_commit, pygit2.Commit): + current_commit = current_commit.peel(pygit2.Commit) + + if past_commit.commit_time > current_commit.commit_time: + logger.warning(f"Not getting commit messages for downgrade of {self.path.name}") + return [] + + commits = self.repo.walk(current_commit.oid) + messages = [] + for i, commit in enumerate(commits): + messages.append(commit.message.strip()) + if commit.oid.hex == past_commit.oid.hex: + break + if i >= Repo.MESSAGE_LIMIT: + logger.warning(f"Reached limit {Repo.MESSAGE_LIMIT} commits for {self.path.name}") + break + return messages