Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve dependencies from lockfiles #1244

Merged
merged 14 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scanpipe/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ class Meta:
"source_packages",
"extra_data",
"package_uid",
"is_private",
"is_virtual",
"datasource_ids",
"datafile_paths",
"file_references",
Expand All @@ -409,6 +411,7 @@ class Meta:
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"dependency_uid",
"for_package_uid",
"resolved_to_package_uid",
Expand Down
8 changes: 8 additions & 0 deletions scanpipe/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,8 @@ class PackageFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
declared_license_expression = django_filters.filters.CharFilter(
widget=HasValueDropdownWidget
)
is_private = StrictBooleanFilter()
is_virtual = StrictBooleanFilter()

class Meta:
model = DiscoveredPackage
Expand Down Expand Up @@ -721,6 +723,8 @@ class Meta:
"is_vulnerable",
"compliance_alert",
"tag",
"is_private",
"is_virtual",
]


Expand All @@ -731,6 +735,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"datasource_id",
"is_vulnerable",
]
Expand All @@ -751,6 +756,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"for_package",
"resolved_to_package",
"datafile_resource",
Expand All @@ -765,6 +771,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
is_runtime = StrictBooleanFilter()
is_optional = StrictBooleanFilter()
is_resolved = StrictBooleanFilter()
is_direct = StrictBooleanFilter()
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
is_vulnerable = IsVulnerable(field_name="affected_by_vulnerabilities")

class Meta:
Expand All @@ -783,6 +790,7 @@ class Meta:
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"datasource_id",
"is_vulnerable",
]
Expand Down
79 changes: 79 additions & 0 deletions scanpipe/migrations/0062_dependency_resolver_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Generated by Django 5.0.6 on 2024-06-04 20:48

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("scanpipe", "0061_codebaseresource_is_legal_and_more"),
]

operations = [
migrations.AddField(
model_name="discovereddependency",
name="is_direct",
field=models.BooleanField(
default=False,
help_text="True if this is a direct, first-level dependency relationship for a package.",
),
),
migrations.AddField(
model_name="discoveredpackage",
name="is_private",
field=models.BooleanField(
default=False,
help_text="True if this is a private package, either not meant to be published on a repository, and/or a local package without a name and version used primarily to track dependencies and other information.",
),
),
migrations.AddField(
model_name="discoveredpackage",
name="is_virtual",
field=models.BooleanField(
default=False,
help_text="True if this package is created only from a manifest or lockfile, and not from its actual packaged code. The files of this package are not present in the codebase.",
),
),
migrations.AlterField(
model_name="discovereddependency",
name="is_optional",
field=models.BooleanField(
default=False,
help_text="True if this dependency is an optional dependency",
),
),
migrations.AlterField(
model_name="discovereddependency",
name="is_resolved",
field=models.BooleanField(
default=False,
help_text="True if this dependency version requirement has been pinned and this dependency points to an exact version.",
),
),
migrations.AlterField(
model_name="discovereddependency",
name="is_runtime",
field=models.BooleanField(
default=False,
help_text="True if this dependency is a runtime dependency.",
),
),
migrations.AddIndex(
model_name="discovereddependency",
index=models.Index(
fields=["is_direct"], name="scanpipe_di_is_dire_6dc594_idx"
),
),
migrations.AddIndex(
model_name="discoveredpackage",
index=models.Index(
fields=["is_private"], name="scanpipe_di_is_priv_9ffd1a_idx"
),
),
migrations.AddIndex(
model_name="discoveredpackage",
index=models.Index(
fields=["is_virtual"], name="scanpipe_di_is_virt_c5c176_idx"
),
),
]
115 changes: 103 additions & 12 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
from licensedcode.cache import get_licensing
from matchcode_toolkit.fingerprinting import IGNORED_DIRECTORY_FINGERPRINTS
from packagedcode.models import build_package_uid
from packagedcode.utils import get_base_purl
from packageurl import PackageURL
from packageurl import normalize_qualifiers
from packageurl.contrib.django.models import PackageURLMixin
Expand Down Expand Up @@ -1031,6 +1032,19 @@ def walk_codebase_path(self):
"""Return files and directories path of the codebase/ directory recursively."""
return self.codebase_path.rglob("*")

def get_resource(self, path):
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
"""
Return the codebase resource present for a given path,
or None the resource with that path does not exist.
This path is relative to the scan location.
This is same as the Codebase.get_resource() function.
"""
# We don't want to raise an exception if there is no resource
# as this function is also called from the SCTK side
resource = self.codebaseresources.get_or_none(path=path)
if resource:
return resource

@cached_property
def can_change_inputs(self):
"""
Expand Down Expand Up @@ -3061,6 +3075,23 @@ class AbstractPackage(models.Model):
blank=True,
help_text=_("A notice text for this package."),
)
is_private = models.BooleanField(
default=False,
help_text=_(
"True if this is a private package, either not meant to be "
"published on a repository, and/or a local package without a "
"name and version used primarily to track dependencies and "
"other information."
),
)
is_virtual = models.BooleanField(
default=False,
help_text=_(
"True if this package is created only from a manifest or lockfile, "
"and not from its actual packaged code. The files of this package "
"are not present in the codebase."
),
)
datasource_ids = models.JSONField(
default=list,
blank=True,
Expand Down Expand Up @@ -3163,6 +3194,8 @@ class Meta:
models.Index(fields=["sha512"]),
models.Index(fields=["compliance_alert"]),
models.Index(fields=["tag"]),
models.Index(fields=["is_private"]),
models.Index(fields=["is_virtual"]),
]
constraints = [
models.UniqueConstraint(
Expand Down Expand Up @@ -3190,15 +3223,7 @@ def purl(self):

@classmethod
def extract_purl_data(cls, package_data):
purl_data = {}

for field_name in PURL_FIELDS:
value = package_data.get(field_name)
if field_name == "qualifiers":
value = normalize_qualifiers(value, encode=True)
purl_data[field_name] = value or ""

return purl_data
return normalize_package_url_data(package_data)
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved

@classmethod
def create_from_data(cls, project, package_data):
Expand Down Expand Up @@ -3530,9 +3555,28 @@ class DiscoveredDependency(
"The identifier for the datafile handler used to obtain this dependency."
),
)
is_runtime = models.BooleanField(default=False)
is_optional = models.BooleanField(default=False)
is_resolved = models.BooleanField(default=False)
is_runtime = models.BooleanField(
default=False,
help_text=_("True if this dependency is a runtime dependency."),
)
is_optional = models.BooleanField(
default=False,
help_text=_("True if this dependency is an optional dependency"),
)
is_resolved = models.BooleanField(
default=False,
help_text=_(
"True if this dependency version requirement has been pinned "
"and this dependency points to an exact version."
),
)
is_direct = models.BooleanField(
default=False,
help_text=_(
"True if this is a direct, first-level dependency relationship "
"for a package."
),
)

objects = DiscoveredDependencyQuerySet.as_manager()

Expand All @@ -3553,6 +3597,7 @@ class Meta:
models.Index(fields=["is_runtime"]),
models.Index(fields=["is_optional"]),
models.Index(fields=["is_resolved"]),
models.Index(fields=["is_direct"]),
]
constraints = [
models.UniqueConstraint(
Expand All @@ -3574,6 +3619,10 @@ def get_absolute_url(self):
def purl(self):
return self.package_url

@property
def base_purl(self):
return get_base_purl(self.package_url)

@property
def package_type(self):
return self.type
Expand All @@ -3599,6 +3648,7 @@ def create_from_data(
project,
dependency_data,
for_package=None,
resolved_to_package=None,
datafile_resource=None,
datasource_id=None,
strip_datafile_path_root=False,
Expand Down Expand Up @@ -3638,6 +3688,13 @@ def create_from_data(
package_uid=for_package_uid
)

if not resolved_to_package:
resolved_to_uid = dependency_data.get("resolved_to_uid")
if resolved_to_uid:
resolved_to_package = project.discoveredpackages.get(
package_uid=resolved_to_uid
)
tdruez marked this conversation as resolved.
Show resolved Hide resolved

if not datafile_resource:
datafile_path = dependency_data.get("datafile_path")
if datafile_path:
Expand All @@ -3663,10 +3720,25 @@ def create_from_data(
return cls.objects.create(
project=project,
for_package=for_package,
resolved_to_package=resolved_to_package,
datafile_resource=datafile_resource,
**cleaned_data,
)

@classmethod
def extract_purl_data(cls, dependency_data, ignore_nulls=False):
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
purl_mapping = PackageURL.from_string(
purl=dependency_data.get("purl"),
).to_dict()

return normalize_package_url_data(purl_mapping, ignore_nulls)

@classmethod
def populate_dependency_uuid(cls, dependency_data):
purl = PackageURL.from_string(purl=dependency_data.get("purl"))
purl.qualifiers["uuid"] = str(uuid.uuid4())
dependency_data["dependency_uid"] = purl.to_string()

@property
def spdx_id(self):
return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.dependency_uid}"
Expand Down Expand Up @@ -3694,6 +3766,25 @@ def as_spdx(self):
)


def normalize_package_url_data(purl_mapping, ignore_nulls=False):
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
"""
Normalize a mapping of purl data so database queries with
purl data can be executed.
"""
normalized_purl_mapping = {}
for field_name in PURL_FIELDS:
value = purl_mapping.get(field_name)
if field_name == "qualifiers":
value = normalize_qualifiers(value, encode=True)
if not ignore_nulls:
normalized_purl_mapping[field_name] = value or ""
else:
if value:
normalized_purl_mapping[field_name] = value or ""

return normalized_purl_mapping


class WebhookSubscription(UUIDPKModel, ProjectRelatedModel):
target_url = models.URLField(_("Target URL"), max_length=1024)
created_date = models.DateTimeField(auto_now_add=True, editable=False)
Expand Down
20 changes: 12 additions & 8 deletions scanpipe/pipelines/inspect_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from scanpipe.pipelines import group
from scanpipe.pipelines.scan_codebase import ScanCodebase
from scanpipe.pipes import scancode

Expand Down Expand Up @@ -49,23 +50,26 @@ def steps(cls):
cls.flag_empty_files,
cls.flag_ignored_resources,
cls.scan_for_application_packages,
cls.create_packages_and_dependencies,
cls.resolve_dependencies,
)

def scan_for_application_packages(self):
"""
Scan resources for package information to add DiscoveredPackage
and DiscoveredDependency objects from detected package data.
"""
# `assemble` is set to False because here in this pipeline we
# only detect package_data in resources and create
# Package/Dependency instances directly instead of assembling
# the packages and assigning files to them
scancode.scan_for_application_packages(
project=self.project,
assemble=False,
assemble=True,
package_only=True,
progress_logger=self.log,
)

def create_packages_and_dependencies(self):
scancode.process_package_data(self.project)
@group("Static Resolver")
def resolve_dependencies(self):
"""
Create packages and dependency relationships from
lockfiles or manifests containing pre-resolved
dependencies.
"""
scancode.resolve_dependencies(project=self.project)
Loading