diff --git a/.github/workflows/v3-bisection.yml b/.github/workflows/v3-bisection.yml index ab35ee6765..53a5c495b8 100644 --- a/.github/workflows/v3-bisection.yml +++ b/.github/workflows/v3-bisection.yml @@ -81,6 +81,15 @@ jobs: --torchbench-repo-path "${PWD}" --config "${BISECT_WORKDIR}/regression-${REGRESSION_DATE}.yaml" \ --output "${BISECT_WORKDIR}/bisect-output-gh${GITHUB_RUN_ID}.json" cp -r "${BISECT_WORKDIR}" ../bisection-result + - name: Create the github issue + continue-on-error: true + if: env.TORCHBENCH_BISECTION_COMMIT_FOUND + uses: peter-evans/create-issue-from-file@v4 + with: + title: V3 Performance Signal Detected by TorchBench Userbenchmark "torch-nightly" on ${{ env.TORCHBENCH_BISECTION_COMMIT_FOUND }} + content-filepath: ./benchmark/gh-issue.md + labels: | + torchbench-perf-report - name: Upload artifact if: always() uses: actions/upload-artifact@v3 diff --git a/.github/workflows/v3-nightly.yml b/.github/workflows/v3-nightly.yml index f188a5b164..670013b69d 100644 --- a/.github/workflows/v3-nightly.yml +++ b/.github/workflows/v3-nightly.yml @@ -70,15 +70,6 @@ jobs: done rm -r ../benchmark-output || true cp -r ./.userbenchmark/torch-nightly ../benchmark-output - - name: Create the github issue - continue-on-error: true - if: env.TORCHBENCH_REGRESSION_DETECTED - uses: peter-evans/create-issue-from-file@v4 - with: - title: V3 Performance Signal Detected by TorchBench Userbenchmark "torch-nightly" on ${{ env.TORCHBENCH_REGRESSION_DETECTED }} - content-filepath: ./benchmark/gh-issue.md - labels: | - torchbench-perf-report - name: Copy artifact and upload to scribe and Amazon S3 run: | . "${SETUP_SCRIPT}" @@ -89,7 +80,7 @@ jobs: python ./scripts/userbenchmark/upload_scribe.py --userbenchmark_json "${LATEST_RESULT}" --userbenchmark_platform "${PLATFORM_NAME}" # Upload the result json to Amazon S3 python ./scripts/userbenchmark/upload_s3.py --upload-file "${LATEST_RESULT}" --userbenchmark_platform "${PLATFORM_NAME}" - - name: Copy regression results to Amazon S3 + - name: Copy regression results to Amazon S3 and kick off bisection if: env.TORCHBENCH_REGRESSION_DETECTED run: | . "${SETUP_SCRIPT}" @@ -97,6 +88,14 @@ jobs: LATEST_REGRESSION_RESULT=$(find ../benchmark-output/ -name "regression-*.yaml" | sort -r | head -1) # Upload the regression json to Amazon S3 python ./scripts/userbenchmark/upload_s3.py --upload-file "${LATEST_REGRESSION_RESULT}" --userbenchmark_platform "${PLATFORM_NAME}" + # Get the workflow ID from + # https://api.github.com/repos/pytorch/benchmark/actions/workflows + # And dispatch the bisection workflow + curl -u xuzhao9:${{ secrets.TORCHBENCH_ACCESS_TOKEN }} \ + -X POST \ + -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/pytorch/benchmark/actions/workflows/57994037/dispatches \ + -d '{"ref": "main", "inputs": {"regression_date": "${{ env.TORCHBENCH_REGRESSION_DETECTED }}" } }' - name: Upload result to GH Actions Artifact uses: actions/upload-artifact@v3 with: diff --git a/bisection.py b/bisection.py index 884bdeed19..0758347eb5 100644 --- a/bisection.py +++ b/bisection.py @@ -63,7 +63,7 @@ TorchRepo, ) from utils.cuda_utils import DEFAULT_CUDA_VERSION, prepare_cuda_env - + from utils.github import process_bisection_into_gh_issue IS_FBCODE = False except (ImportError, ModuleNotFoundError): # Meta-Internal imports @@ -173,21 +173,26 @@ def __str__(self): class BisectionTargetRepo: repo: TorchRepo + # Start and end git hash start: str end: str + # Start and end version + start_version: str + end_version: str non_target_repos: List[TorchRepo] # generated in prep() bisection_env: os._Environ commits: List[Commit] # Map from commit SHA to its index in commits commit_dict: Dict[str, int] - - def __init__( - self, repo: TorchRepo, start: str, end: str, non_target_repos: List[TorchRepo] - ): + def __init__(self, repo: TorchRepo, start: str, end: str, + start_version: str, end_version: str, + non_target_repos: List[TorchRepo]): self.repo = repo self.start = start self.end = end + self.start_version = start_version + self.end_version = end_version self.non_target_repos = non_target_repos self.commits = [] self.commit_dict = dict() @@ -488,7 +493,9 @@ def output(self): json_obj = dict() json_obj["target_repo"] = self.target_repo.repo.name json_obj["start"] = self.target_repo.start + json_obj["start_version"] = self.target_repo.start_version json_obj["end"] = self.target_repo.end + json_obj["end_version"] = self.target_repo.end_version json_obj["result"] = [] for res in self.result: r = dict() @@ -501,7 +508,7 @@ def output(self): json_obj["result"].append(r) with open(self.output_json, "w") as outfile: json.dump(json_obj, outfile, indent=2) - print(f"Bisection successful. Result saved to {self.output_json}:") + print(f"Bisection successful. Result saved to {self.output_json}.") print(json_obj) @@ -544,6 +551,11 @@ def main() -> None: default="torchbench", help="Repositories to skip update.", ) + parser.add_argument( + "--gh-issue-path", + default="gh-issue.md", + help="Output path to print the issue body" + ) # by default, debug mode is disabled parser.add_argument( "--debug", @@ -610,6 +622,8 @@ def main() -> None: target_repo=target_repo, start=start_hash, end=end_hash, + start_version=bisect_config.control_env.get("pytorch_version", "N/A"), + end_version=bisect_config.treatment_env.get("pytorch_version", "N/A"), bisect_config=bisect_config, output_json=args.output, debug=args.debug, @@ -625,7 +639,9 @@ def main() -> None: ) bisection.run() bisection.output() - + # Format the output into a github issue if the bisector finds the root cause commit + if bisection.result: + process_bisection_into_gh_issue(bisection.output_json, args.gh_issue_path) if __name__ == "__main__": main() # pragma: no cover diff --git a/regression_detector.py b/regression_detector.py index bb28f39145..f2b58ca366 100644 --- a/regression_detector.py +++ b/regression_detector.py @@ -6,11 +6,12 @@ import importlib from dataclasses import asdict import os +import re import yaml from pathlib import Path import time from datetime import datetime -from typing import Any, List, Dict, Optional +from typing import Any, List, Dict, Tuple, Optional from userbenchmark.utils import PLATFORMS, USERBENCHMARK_OUTPUT_PREFIX, REPO_PATH, \ TorchBenchABTestResult, get_date_from_metrics, \ get_ub_name, get_latest_files_in_s3_from_last_n_days, get_date_from_metrics_s3_key @@ -19,17 +20,19 @@ GITHUB_ISSUE_TEMPLATE = """ TorchBench CI has detected a performance signal or runtime regression. -Base PyTorch commit: {start} +Control PyTorch commit: {control_commit} +Control PyTorch version: {control_version} -Affected PyTorch commit: {end} +Treatment PyTorch commit: {treatment_commit} +Treatment PyTorch version: {treatment_version} Affected Tests: {test_details} -Tests that were no longer run on affected commit: +Tests that were no longer run on treatment commit: {control_only_tests} -Tests that were newly added on affected commit: +Tests that were newly added on treatment commit: {treatment_only_tests} Runtime regressions found? @@ -103,6 +106,15 @@ def process_regressions_into_yaml(regression_result: TorchBenchABTestResult, out def process_regressions_into_gh_issue(regression_result: TorchBenchABTestResult, owner: str, output_path: str, errors_path: str) -> None: + def _parse_date_from_pytorch_version(pytorch_version: str) -> Optional[str]: + # example pytorch nightly version: "2.2.0.dev20231116+cu118" + # return a date string like "2023-11-16" + ver_regex = "dev[0-9+]\+" + s = re.search(ver_regex, pytorch_version) + if not s or not s.groups(): + return None + return datetime.strftime(datetime.strptime(s.groups[0], "%Y%m%d"), "%Y-%m-%d") + regressions_dict = asdict(regression_result) troubled_tests = "" for test, stats in regressions_dict["details"].items(): @@ -122,7 +134,9 @@ def process_regressions_into_gh_issue(regression_result: TorchBenchABTestResult, treatment_only_tests += f"- {test}: {stat}\n" control_commit = regressions_dict["control_env"]["pytorch_git_version"] + control_version = regressions_dict["control_env"]["pytorch_version"] treatment_commit = regressions_dict["treatment_env"]["pytorch_git_version"] + treatment_version = regressions_dict["treatment_env"]["pytorch_version"] runtime_regressions_msg = "No runtime errors were found in the " + \ "new benchmarks run--you are all good there!" @@ -138,7 +152,11 @@ def process_regressions_into_gh_issue(regression_result: TorchBenchABTestResult, if "GITHUB_ENV" in os.environ: fname = os.environ["GITHUB_ENV"] - content = f"TORCHBENCH_REGRESSION_DETECTED='{treatment_commit}'\n" + treatment_date = _parse_date_from_pytorch_version(treatment_version) + # If can't parse the version date from pytorch version, use today + if not treatment_date: + treatment_date = datetime.today().strftime("%Y-%m-%d") + content = f"TORCHBENCH_REGRESSION_DETECTED='{treatment_date}'\n" with open(fname, 'a') as fo: fo.write(content) @@ -149,8 +167,10 @@ def process_regressions_into_gh_issue(regression_result: TorchBenchABTestResult, github_run_url = f"https://github.com/pytorch/benchmark/actions/runs/{github_run_id}" issue_config: Dict[str, str] = { - "start": control_commit, - "end": treatment_commit, + "control_commit": control_commit, + "treatment_commit": treatment_commit, + "control_version": control_version, + "treatment_version": treatment_version, "test_details": troubled_tests, "control_only_tests": control_only_tests, "treatment_only_tests": treatment_only_tests, @@ -174,7 +194,7 @@ def get_best_start_date(latest_metrics_jsons: List[str], end_date: datetime) -> return None -def get_metrics_by_date(latest_metrics_jsons: List[str], pick_date: datetime): +def get_metrics_by_date(latest_metrics_jsons: List[str], pick_date: datetime) -> Tuple[Any, str]: pick_metrics_json_key: Optional[str] = None for metrics_json_key in latest_metrics_jsons: metric_datetime = get_date_from_metrics_s3_key(metrics_json_key) diff --git a/utils/github.py b/utils/github.py new file mode 100644 index 0000000000..47c5374033 --- /dev/null +++ b/utils/github.py @@ -0,0 +1,61 @@ +import json +import os + +from typing import Dict + +GITHUB_ISSUE_TEMPLATE = """ +TorchBench CI has detected a performance signal or runtime regression, and bisected its result. + +Control PyTorch commit: {control_commit} +Control PyTorch version: {control_version} + +Treatment PyTorch commit: {treatment_commit} +Treatment PyTorch version: {treatment_version} + +Bisection result: + +``` +{result} +``` + +cc {owner} +""" + +DEFAULT_GH_ISSUE_OWNER = "@xuzhao9" + +def process_bisection_into_gh_issue(bisection_output_json: str, output_path: str) -> None: + with open(bisection_output_json, "r") as fp: + bisection = json.load(fp) + + result = json.dump(bisection, indent=4) + control_commit = bisection["start"] + control_version = bisection["start_version"] + treatment_commit = bisection["end"] + treatment_version = bisection["end_version"] + + if "GITHUB_ENV" in os.environ: + fname = os.environ["GITHUB_ENV"] + content = f"TORCHBENCH_BISECTION_COMMIT_FOUND_OR_FAILED='{bisection.target_repo.end}'\n" + with open(fname, 'a') as fo: + fo.write(content) + process_bisection_into_gh_issue(bisection.output_json) + + github_run_id = os.environ.get("GITHUB_RUN_ID", None) + github_run_url = "No URL found, please look for the failing action in " + \ + "https://github.com/pytorch/benchmark/actions" + if github_run_id is not None: + github_run_url = f"https://github.com/pytorch/benchmark/actions/runs/{github_run_id}" + + issue_config: Dict[str, str] = { + "control_commit": control_commit, + "treatment_commit": treatment_commit, + "control_version": control_version, + "treatment_version": treatment_version, + "result": result, + "github_run_url": github_run_url, + "owner": DEFAULT_GH_ISSUE_OWNER + } + + issue_body = GITHUB_ISSUE_TEMPLATE.format(**issue_config) + with open(output_path, "w") as f: + f.write(issue_body)