Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve dependencies from lockfiles #1244

Merged
merged 14 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scanpipe/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ class Meta:
"source_packages",
"extra_data",
"package_uid",
"is_private",
"is_virtual",
"datasource_ids",
"datafile_paths",
"file_references",
Expand All @@ -409,6 +411,7 @@ class Meta:
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"dependency_uid",
"for_package_uid",
"resolved_to_package_uid",
Expand Down
8 changes: 8 additions & 0 deletions scanpipe/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,8 @@ class PackageFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
declared_license_expression = django_filters.filters.CharFilter(
widget=HasValueDropdownWidget
)
is_private = StrictBooleanFilter()
is_virtual = StrictBooleanFilter()

class Meta:
model = DiscoveredPackage
Expand Down Expand Up @@ -721,6 +723,8 @@ class Meta:
"is_vulnerable",
"compliance_alert",
"tag",
"is_private",
"is_virtual",
]


Expand All @@ -731,6 +735,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"datasource_id",
"is_vulnerable",
]
Expand All @@ -751,6 +756,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"for_package",
"resolved_to_package",
"datafile_resource",
Expand All @@ -765,6 +771,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
is_runtime = StrictBooleanFilter()
is_optional = StrictBooleanFilter()
is_resolved = StrictBooleanFilter()
is_direct = StrictBooleanFilter()
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
is_vulnerable = IsVulnerable(field_name="affected_by_vulnerabilities")

class Meta:
Expand All @@ -783,6 +790,7 @@ class Meta:
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"datasource_id",
"is_vulnerable",
]
Expand Down
46 changes: 46 additions & 0 deletions scanpipe/migrations/0062_dependency_resolver_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Generated by Django 5.0.6 on 2024-06-04 20:48

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("scanpipe", "0061_codebaseresource_is_legal_and_more"),
]

operations = [
migrations.AddField(
model_name="discovereddependency",
name="is_direct",
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name="discoveredpackage",
name="is_private",
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name="discoveredpackage",
name="is_virtual",
field=models.BooleanField(default=False),
),
migrations.AddIndex(
model_name="discovereddependency",
index=models.Index(
fields=["is_direct"], name="scanpipe_di_is_dire_6dc594_idx"
),
),
migrations.AddIndex(
model_name="discoveredpackage",
index=models.Index(
fields=["is_private"], name="scanpipe_di_is_priv_9ffd1a_idx"
),
),
migrations.AddIndex(
model_name="discoveredpackage",
index=models.Index(
fields=["is_virtual"], name="scanpipe_di_is_virt_c5c176_idx"
),
),
]
95 changes: 83 additions & 12 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,19 @@ def walk_codebase_path(self):
"""Return files and directories path of the codebase/ directory recursively."""
return self.codebase_path.rglob("*")

def get_resource(self, path):
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
"""
Return the codebase resource present for a given path,
or None the resource with that path does not exist.
This path is relative to the scan location.
This is same as the Codebase.get_resource() function.
"""
# We don't want to raise an exception if there is no resource
# as this function is also called from the SCTK side
resource = self.codebaseresources.get_or_none(path=path)
if resource:
return resource

@cached_property
def can_change_inputs(self):
"""
Expand Down Expand Up @@ -3061,6 +3074,8 @@ class AbstractPackage(models.Model):
blank=True,
help_text=_("A notice text for this package."),
)
is_private = models.BooleanField(default=False)
is_virtual = models.BooleanField(default=False)
datasource_ids = models.JSONField(
default=list,
blank=True,
Expand Down Expand Up @@ -3163,6 +3178,8 @@ class Meta:
models.Index(fields=["sha512"]),
models.Index(fields=["compliance_alert"]),
models.Index(fields=["tag"]),
models.Index(fields=["is_private"]),
models.Index(fields=["is_virtual"]),
]
constraints = [
models.UniqueConstraint(
Expand Down Expand Up @@ -3190,15 +3207,7 @@ def purl(self):

@classmethod
def extract_purl_data(cls, package_data):
purl_data = {}

for field_name in PURL_FIELDS:
value = package_data.get(field_name)
if field_name == "qualifiers":
value = normalize_qualifiers(value, encode=True)
purl_data[field_name] = value or ""

return purl_data
return normalize_package_url_data(package_data)
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved

@classmethod
def create_from_data(cls, project, package_data):
Expand Down Expand Up @@ -3530,9 +3539,28 @@ class DiscoveredDependency(
"The identifier for the datafile handler used to obtain this dependency."
),
)
is_runtime = models.BooleanField(default=False)
is_optional = models.BooleanField(default=False)
is_resolved = models.BooleanField(default=False)
is_runtime = models.BooleanField(
default=False,
help_text=_("True if this dependency is a runtime dependency."),
)
is_optional = models.BooleanField(
default=False,
help_text=_("True if this dependency is an optional dependency"),
)
is_resolved = models.BooleanField(
default=False,
help_text=_(
"True if this dependency version requirement has been pinned "
"and this dependency points to an exact version."
),
)
is_direct = models.BooleanField(
default=False,
help_text=_(
"True if this is a direct, first-level dependency relationship "
"for a package."
),
)

objects = DiscoveredDependencyQuerySet.as_manager()

Expand All @@ -3553,6 +3581,7 @@ class Meta:
models.Index(fields=["is_runtime"]),
models.Index(fields=["is_optional"]),
models.Index(fields=["is_resolved"]),
models.Index(fields=["is_direct"]),
]
constraints = [
models.UniqueConstraint(
Expand Down Expand Up @@ -3599,6 +3628,7 @@ def create_from_data(
project,
dependency_data,
for_package=None,
resolved_to_package=None,
datafile_resource=None,
datasource_id=None,
strip_datafile_path_root=False,
Expand Down Expand Up @@ -3638,6 +3668,13 @@ def create_from_data(
package_uid=for_package_uid
)

if not resolved_to_package:
resolved_to_uid = dependency_data.get("resolved_to_uid")
if resolved_to_uid:
resolved_to_package = project.discoveredpackages.get(
package_uid=resolved_to_uid
)
tdruez marked this conversation as resolved.
Show resolved Hide resolved

if not datafile_resource:
datafile_path = dependency_data.get("datafile_path")
if datafile_path:
Expand All @@ -3663,10 +3700,25 @@ def create_from_data(
return cls.objects.create(
project=project,
for_package=for_package,
resolved_to_package=resolved_to_package,
datafile_resource=datafile_resource,
**cleaned_data,
)

@classmethod
def extract_purl_data(cls, dependency_data, ignore_nulls=False):
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
purl_mapping = PackageURL.from_string(
purl=dependency_data.get("purl"),
).to_dict()

return normalize_package_url_data(purl_mapping, ignore_nulls)

@classmethod
def populate_dependency_uuid(cls, dependency_data):
purl = PackageURL.from_string(purl=dependency_data.get("purl"))
purl.qualifiers["uuid"] = str(uuid.uuid4())
dependency_data["dependency_uid"] = purl.to_string()

@property
def spdx_id(self):
return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.dependency_uid}"
Expand Down Expand Up @@ -3694,6 +3746,25 @@ def as_spdx(self):
)


def normalize_package_url_data(purl_mapping, ignore_nulls=False):
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
"""
Normalize a mapping of purl data so database queries with
purl data can be executed.
"""
normalized_purl_mapping = {}
for field_name in PURL_FIELDS:
value = purl_mapping.get(field_name)
if field_name == "qualifiers":
value = normalize_qualifiers(value, encode=True)
if not ignore_nulls:
normalized_purl_mapping[field_name] = value or ""
else:
if value:
normalized_purl_mapping[field_name] = value or ""

return normalized_purl_mapping


class WebhookSubscription(UUIDPKModel, ProjectRelatedModel):
target_url = models.URLField(_("Target URL"), max_length=1024)
created_date = models.DateTimeField(auto_now_add=True, editable=False)
Expand Down
20 changes: 12 additions & 8 deletions scanpipe/pipelines/inspect_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from scanpipe.pipelines import group
from scanpipe.pipelines.scan_codebase import ScanCodebase
from scanpipe.pipes import scancode

Expand Down Expand Up @@ -49,23 +50,26 @@ def steps(cls):
cls.flag_empty_files,
cls.flag_ignored_resources,
cls.scan_for_application_packages,
cls.create_packages_and_dependencies,
cls.resolve_dependencies,
)

def scan_for_application_packages(self):
"""
Scan resources for package information to add DiscoveredPackage
and DiscoveredDependency objects from detected package data.
"""
# `assemble` is set to False because here in this pipeline we
# only detect package_data in resources and create
# Package/Dependency instances directly instead of assembling
# the packages and assigning files to them
scancode.scan_for_application_packages(
project=self.project,
assemble=False,
assemble=True,
package_only=True,
progress_logger=self.log,
)

def create_packages_and_dependencies(self):
scancode.process_package_data(self.project)
@group("Static Resolver")
def resolve_dependencies(self):
"""
Create packages and dependency relationships from
lockfiles or manifests containing pre-resolved
dependencies.
"""
scancode.resolve_dependencies(project=self.project)
19 changes: 19 additions & 0 deletions scanpipe/pipelines/resolve_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from scanpipe.pipelines import group
from scanpipe.pipelines.scan_codebase import ScanCodebase
from scanpipe.pipes import resolve
from scanpipe.pipes import scancode


class ResolveDependencies(ScanCodebase):
Expand All @@ -45,6 +47,8 @@ def steps(cls):
cls.collect_and_create_codebase_resources,
cls.flag_ignored_resources,
cls.get_manifest_inputs,
cls.scan_for_application_packages,
cls.create_packages_and_dependencies,
cls.get_packages_from_manifest,
cls.create_resolved_packages,
)
Expand All @@ -53,6 +57,20 @@ def get_manifest_inputs(self):
"""Locate package manifest files with a supported package resolver."""
self.manifest_resources = resolve.get_manifest_resources(self.project)

@group("Static Resolver")
def scan_for_application_packages(self):
scancode.scan_for_application_packages(
self.project,
assemble=False,
resource_qs=self.manifest_resources,
progress_logger=self.log,
)

@group("Static Resolver")
def create_packages_and_dependencies(self):
scancode.process_package_data(self.project, static_resolve=True)

@group("Dynamic Resolver")
def get_packages_from_manifest(self):
"""
Resolve package data from lockfiles/requirement files with package
Expand All @@ -65,6 +83,7 @@ def get_packages_from_manifest(self):
model="get_packages_from_manifest",
)

@group("Dynamic Resolver")
def create_resolved_packages(self):
"""Create the resolved packages and their dependencies in the database."""
resolve.create_packages_and_dependencies(
Expand Down
Loading