From 44711eafc66c9fae1b06f5c7fc2946abe97c8059 Mon Sep 17 00:00:00 2001 From: Ayan Sinha Mahapatra Date: Fri, 28 Jun 2024 16:44:00 +0530 Subject: [PATCH] Address feedback and refactor code Signed-off-by: Ayan Sinha Mahapatra --- .../0062_dependency_resolver_update.py | 39 ++++++++- scanpipe/models.py | 19 +++- scanpipe/pipes/__init__.py | 6 +- scanpipe/pipes/scancode.py | 86 +++++++++++++------ scanpipe/tests/test_models.py | 13 +++ scanpipe/tests/test_pipelines.py | 17 ++-- 6 files changed, 137 insertions(+), 43 deletions(-) diff --git a/scanpipe/migrations/0062_dependency_resolver_update.py b/scanpipe/migrations/0062_dependency_resolver_update.py index 8bafb6db7..733b83c12 100644 --- a/scanpipe/migrations/0062_dependency_resolver_update.py +++ b/scanpipe/migrations/0062_dependency_resolver_update.py @@ -13,17 +13,50 @@ class Migration(migrations.Migration): migrations.AddField( model_name="discovereddependency", name="is_direct", - field=models.BooleanField(default=False), + field=models.BooleanField( + default=False, + help_text="True if this is a direct, first-level dependency relationship for a package.", + ), ), migrations.AddField( model_name="discoveredpackage", name="is_private", - field=models.BooleanField(default=False), + field=models.BooleanField( + default=False, + help_text="True if this is a private package, either not meant to be published on a repository, and/or a local package without a name and version used primarily to track dependencies and other information.", + ), ), migrations.AddField( model_name="discoveredpackage", name="is_virtual", - field=models.BooleanField(default=False), + field=models.BooleanField( + default=False, + help_text="True if this package is created only from a manifest or lockfile, and not from its actual packaged code. The files of this package are not present in the codebase.", + ), + ), + migrations.AlterField( + model_name="discovereddependency", + name="is_optional", + field=models.BooleanField( + default=False, + help_text="True if this dependency is an optional dependency", + ), + ), + migrations.AlterField( + model_name="discovereddependency", + name="is_resolved", + field=models.BooleanField( + default=False, + help_text="True if this dependency version requirement has been pinned and this dependency points to an exact version.", + ), + ), + migrations.AlterField( + model_name="discovereddependency", + name="is_runtime", + field=models.BooleanField( + default=False, + help_text="True if this dependency is a runtime dependency.", + ), ), migrations.AddIndex( model_name="discovereddependency", diff --git a/scanpipe/models.py b/scanpipe/models.py index 6e641fc02..7f67f5e23 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -3074,8 +3074,23 @@ class AbstractPackage(models.Model): blank=True, help_text=_("A notice text for this package."), ) - is_private = models.BooleanField(default=False) - is_virtual = models.BooleanField(default=False) + is_private = models.BooleanField( + default=False, + help_text=_( + "True if this is a private package, either not meant to be " + "published on a repository, and/or a local package without a " + "name and version used primarily to track dependencies and " + "other information." + ), + ) + is_virtual = models.BooleanField( + default=False, + help_text=_( + "True if this package is created only from a manifest or lockfile, " + "and not from its actual packaged code. The files of this package " + "are not present in the codebase." + ), + ) datasource_ids = models.JSONField( default=list, blank=True, diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index c5b0a015b..16c232dad 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -287,12 +287,12 @@ def update_or_create_dependency( dependencies.append(dependency) for dependency in dependencies: - # This dependency relationship is for a new package - if ( + is_for_new_package = ( for_package and dependency.for_package and dependency.for_package != for_package - ): + ) + if is_for_new_package: DiscoveredDependency.populate_dependency_uuid(dependency_data) dependency = DiscoveredDependency.create_from_data( project, diff --git a/scanpipe/pipes/scancode.py b/scanpipe/pipes/scancode.py index 7a079d4c7..e72a9c471 100644 --- a/scanpipe/pipes/scancode.py +++ b/scanpipe/pipes/scancode.py @@ -489,38 +489,73 @@ def process_package_data(project, static_resolve=False): for resource in project.codebaseresources.has_package_data(): logger.info(f" Processing: {resource.path}") for package_mapping in resource.package_data: - pd = packagedcode_models.PackageData.from_dict(mapping=package_mapping) - if not pd.can_assemble: - continue + create_packages_and_dependencies_from_mapping( + project=project, + resource=resource, + package_mapping=package_mapping, + find_package=False, + ) - logger.info(f" Package data: {pd.purl}") + if static_resolve: + resolve_dependencies(project) - package_data = pd.to_dict() - dependencies = package_data.pop("dependencies") - package = None - if pd.purl: - package = pipes.update_or_create_package( - project=project, - package_data=package_data, - codebase_resources=[resource], - ) +def create_packages_and_dependencies_from_mapping( + project, + resource, + package_mapping, + find_package=False, +): + """ + Create or update packages and dependencies from a `package_mapping`, + for a respective `resource` and `project`. - for dep in dependencies: - pipes.update_or_create_dependency( - project=project, - dependency_data=dep, - for_package=package, - datafile_resource=resource, - datasource_id=pd.datasource_id, - ) + If `find_package` is True, find the package with the respective purl data, + instead of trying to create it. + """ + pd = packagedcode_models.PackageData.from_dict(mapping=package_mapping) + if not pd.can_assemble: + return - if static_resolve: - resolve_dependencies(project) + logger.info(f" Package data: {pd.purl}") + package_data = pd.to_dict() + dependencies = package_data.pop("dependencies") -def resolve_dependencies(project): + package = None + if pd.purl: + if find_package: + purl_data = DiscoveredPackage.extract_purl_data(package_mapping) + packages = DiscoveredPackage.objects.filter( + project=project, + **purl_data, + ) + for package in packages: + if resource.location in package.datafile_paths: + break + else: + package = pipes.update_or_create_package( + project=project, + package_data=package_data, + codebase_resources=[resource], + ) + + update_packages_and_dependencies( + project=project, + dependencies=dependencies, + package=package, + resource=resource, + datasource_id=pd.datasource_id, + process_resolved=False, + ) + + +def resolve_dependencies(project): + """ + Match and merge resolved dependencies to create a dependency graph of + direct dependency relations between resolved packages. + """ logger.info(f"Project {project} resolve_dependencies:") for resource in project.codebaseresources.has_package_data(): for package_mapping in resource.package_data: @@ -555,6 +590,7 @@ def update_packages_and_dependencies( package, resource, datasource_id, + process_resolved=True, ): """ Create DiscoveredPackage and DiscoveredDependency objects from @@ -564,7 +600,7 @@ def update_packages_and_dependencies( for dep in dependencies: resolved_package = dep.get("resolved_package") or {} resolved_to_package = None - if resolved_package: + if process_resolved and resolved_package: resolved_to_package = pipes.update_or_create_package( project=project, package_data=resolved_package, diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index d7c89bd44..dc1f2c935 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -49,6 +49,7 @@ from django.utils import timezone from packagedcode.models import PackageData +from packageurl import PackageURL from requests.exceptions import RequestException from rq.job import JobStatus @@ -65,6 +66,7 @@ from scanpipe.models import UUIDTaggedItem from scanpipe.models import convert_glob_to_django_regex from scanpipe.models import get_project_work_directory +from scanpipe.models import normalize_package_url_data from scanpipe.pipes.fetch import Download from scanpipe.pipes.input import copy_input from scanpipe.tests import dependency_data1 @@ -730,6 +732,17 @@ def test_scanpipe_project_get_ignored_dependency_scopes_index(self): expected = {"npm": ["devDependencies"], "pypi": ["tests", "build"]} self.assertEqual(expected, self.project1.get_ignored_dependency_scopes_index()) + def test_scanpipe_normalize_package_url_data(self): + purl = PackageURL.from_string("pkg:npm/athena-express@6.0.4") + purl_data = normalize_package_url_data(purl_mapping=purl.to_dict()) + self.assertEqual(purl_data.get("namespace"), "") + + purl_data = normalize_package_url_data( + purl_mapping=purl.to_dict(), + ignore_nulls=True, + ) + self.assertEqual(purl_data.get("namespace"), None) + def test_scanpipe_project_get_ignored_vulnerabilities_set(self): self.project1.settings = { "ignored_vulnerabilities": [ diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 850b106aa..010a0ec76 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -557,6 +557,12 @@ def _normalize_package_uids(self, data): Return the `data`, where any `package_uid` value has been normalized with `purl_with_fake_uuid()` """ + fields_with_package_uids = [ + "package_uid", + "dependency_uid", + "for_package_uid", + "resolved_to_package_uid", + ] if isinstance(data, list): return [self._normalize_package_uids(entry) for entry in data] @@ -568,16 +574,7 @@ def _normalize_package_uids(self, data): for key, value in data.items(): if isinstance(value, (list, dict)): value = self._normalize_package_uids(value) - if ( - key - in ( - "package_uid", - "dependency_uid", - "for_package_uid", - "resolved_to_package_uid", - ) - and value - ): + if key in fields_with_package_uids and value: value = purl_with_fake_uuid(value) if key == "for_packages" and value: value = sorted(