From 18701364386eacb7080944195c6ffca27c5836cf Mon Sep 17 00:00:00 2001 From: gwnlng <108258271+gwnlng@users.noreply.github.com> Date: Thu, 3 Nov 2022 11:33:33 +0800 Subject: [PATCH] fix: files-threshold (#121) * fix: limit import to 1,000 manifests per repository --- README.md | 5 ++++ app/tests/test_snyk_scm_refresh.py | 42 ++++++++++++++++++++++++++++-- app/utils/snyk_helper.py | 11 ++++++++ common.py | 5 ++++ 4 files changed, 61 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1bcdd15..e87d064 100644 --- a/README.md +++ b/README.md @@ -129,6 +129,7 @@ Use the `--dry-run` option to verify the execution plan for the first run | _updated-project-branches.csv | projects with updated default branch | | _update-project-branches-errors.csv | projects that had an error attempting to update default branch | | _repos-skipped-on-error.csv | repos skipped due to import error | +| _manifests-skipped-on-limit.csv | manifest projects skipped due to import limit | ### Handling of large repositories The primary method used by this tool to retrieve the GIT tree from each repository for the basis of comparison is via the Github API. @@ -152,3 +153,7 @@ This will only query the git tree via API and look for a truncated response, and To find all the repos based on a Snyk org, use the `--org-id` parameter in conjunction with `--audit-large-repos` Optionally you can also supply a repo name to check a single repo by also supplying the `--repo-name` filter. + +### Importing manifest limit +There is a set manifest projects import limit per execution. Skipped manifests projects above the limit will be logged to a CSV file. +Relaunch `snyk_scm_refresh` at the next execution schedule to import any skipped projects. diff --git a/app/tests/test_snyk_scm_refresh.py b/app/tests/test_snyk_scm_refresh.py index 0341c99..56ef1f3 100644 --- a/app/tests/test_snyk_scm_refresh.py +++ b/app/tests/test_snyk_scm_refresh.py @@ -1,6 +1,10 @@ """test suite for snyk_scm_refresh.py""" import os import pytest +import random +import string +import snyk +from snyk.models import Organization from snyk.models import Project import common from app.snyk_repo import SnykRepo @@ -12,7 +16,8 @@ ) from app.utils.snyk_helper import ( get_snyk_projects_for_repo, - get_snyk_repos_from_snyk_projects + get_snyk_repos_from_snyk_projects, + import_manifests ) class MockResponse: @@ -190,7 +195,7 @@ def organization(self): org = Organization( name="My Other Org", id="a04d9cbd-ae6e-44af-b573-0556b0ad4bd2" ) - org.client = SnykClient("token") + org.client = snyk.SnykClient("token") return org def base_url(self): @@ -269,3 +274,36 @@ def test_passes_manifest_filter(): assert passes_manifest_filter(path_fail_2) == False assert passes_manifest_filter(path_pass_2) == True assert passes_manifest_filter(path_fail_3) == False + + +def test_import_manifest_exceeds_limit(mocker): + """ + Pytest snyk_helper.import_manifest exceeding limit of manifest projects + """ + # refer to ie-playground org + org_id = "39ddc762-b1b9-41ce-ab42-defbe4575bd6" + repo_full_name = "snyk-playground/java-goof" + integration_id = "5881e5b0-308f-4a1b-9bcb-38e3491872e0" + files = [] + + # follow snyk_repo.add_new_manifests appending manifest path + for x in range(common.MAX_IMPORT_MANIFEST_PROJECTS + 1): + files.append(dict({"path": ''.join(random.choices(string.ascii_lowercase, k=5)) + ".tf"})) + + mocker.patch.dict(os.environ, {'GITHUB_TOKEN': '1234'}) + org = Organization( + name="My Other Org", id=org_id, slug="myotherorg", url=f"https://snyk.io/api/v1/org/{org_id}" + ) + org.client = snyk.SnykClient("token") + mocker.patch("snyk.managers.OrganizationManager.get", return_value=org) + mocker.patch("snyk.models.Organization.client", return_value=org.client) + + # run assertion mock client will post request and hit SnykHTTPError + with pytest.raises(snyk.errors.SnykHTTPError): + import_manifests(org_id, repo_full_name, integration_id, files) + + # assert csv contains header and a skipped manifest file path + common.MANIFESTS_SKIPPED_ON_LIMIT_FILE.close() + with open("snyk-scm-refresh_manifests-skipped-on-limit.csv", 'r') as fp: + num_lines = len(fp.readlines()) + assert num_lines == 2 diff --git a/app/utils/snyk_helper.py b/app/utils/snyk_helper.py index 1faab66..255a2e7 100644 --- a/app/utils/snyk_helper.py +++ b/app/utils/snyk_helper.py @@ -183,6 +183,17 @@ def import_manifests(org_id, repo_full_name, integration_id, files=[]) -> Import path = f"org/{org.id}/integrations/{integration_id}/import" if len(files) > 0: + # verify against set limit per repo + if len(files) > common.MAX_IMPORT_MANIFEST_PROJECTS: + # log skipped manifests exceeding limit to csv file + skipped_files = files[-(len(files) - common.MAX_IMPORT_MANIFEST_PROJECTS):] + print(f"Importing up to limit of {common.MAX_IMPORT_MANIFEST_PROJECTS}/{len(files)}") + print(f"See skipped manifests in {common.MANIFESTS_SKIPPED_ON_LIMIT_FILE.name}") + for mf in skipped_files: + common.MANIFESTS_SKIPPED_ON_LIMIT_FILE.write(f"{mf['path']}\n") + # import manifests within limit + files = files[:common.MAX_IMPORT_MANIFEST_PROJECTS] + payload = { "target": {"owner": repo_full_name[0], "name": repo_full_name[1], "branch": ""}, "files": files diff --git a/common.py b/common.py index 834e093..1ced141 100644 --- a/common.py +++ b/common.py @@ -53,6 +53,10 @@ "%s_repos-skipped-on-error.csv" % LOG_PREFIX, "w" ) REPOS_SKIPPED_ON_ERROR_FILE.write("org,repo,status\n") +MANIFESTS_SKIPPED_ON_LIMIT_FILE = open( + "%s_manifests-skipped-on-limit.csv" % LOG_PREFIX, "w" +) +MANIFESTS_SKIPPED_ON_LIMIT_FILE.write("skipped_manifest_file_path\n") UPDATED_PROJECT_BRANCHES_FILE = open( "%s_updated-project-branches.csv" % LOG_PREFIX, "w" ) @@ -180,3 +184,4 @@ def toggle_to_bool(toggle_value) -> bool: PROJECT_TYPE_ENABLED_IAC = toggle_to_bool(ARGS.iac) # disabled snyk code due to unsupported underlying api changes PROJECT_TYPE_ENABLED_CODE = False +MAX_IMPORT_MANIFEST_PROJECTS = 1000