Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve dependencies from lockfiles #1244

Merged
merged 14 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scanpipe/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@ class Meta:
"source_packages",
"extra_data",
"package_uid",
"is_private",
"is_virtual",
"datasource_ids",
"datafile_paths",
"file_references",
Expand All @@ -405,6 +407,7 @@ class Meta:
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"dependency_uid",
"for_package_uid",
"resolved_to_package_uid",
Expand Down
4 changes: 4 additions & 0 deletions scanpipe/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"datasource_id",
"is_vulnerable",
]
Expand All @@ -747,6 +748,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"for_package",
"resolved_to_package",
"datafile_resource",
Expand All @@ -761,6 +763,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
is_runtime = StrictBooleanFilter()
is_optional = StrictBooleanFilter()
is_resolved = StrictBooleanFilter()
is_direct = StrictBooleanFilter()
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
is_vulnerable = IsVulnerable(field_name="affected_by_vulnerabilities")

class Meta:
Expand All @@ -779,6 +782,7 @@ class Meta:
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"datasource_id",
"is_vulnerable",
]
Expand Down
34 changes: 34 additions & 0 deletions scanpipe/migrations/0061_dependency_resolver_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Generated by Django 5.0.6 on 2024-06-04 20:48

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("scanpipe", "0060_discovereddependency_renames"),
]

operations = [
migrations.AddField(
model_name="discovereddependency",
name="is_direct",
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name="discoveredpackage",
name="is_private",
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name="discoveredpackage",
name="is_virtual",
field=models.BooleanField(default=False),
),
migrations.AddIndex(
model_name="discovereddependency",
index=models.Index(
fields=["is_direct"], name="scanpipe_di_is_dire_6dc594_idx"
),
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
),
]
51 changes: 51 additions & 0 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,19 @@ def walk_codebase_path(self):
"""Return files and directories path of the codebase/ directory recursively."""
return self.codebase_path.rglob("*")

def get_resource(self, path):
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
"""
Return the codebase resource present for a given path,
or None the resource with that path does not exist.
This path is relative to the scan location.
This is same as the Codebase.get_resource() function.
"""
# We don't want to raise an exception if there is no resource
# as this function is also called from the SCTK side
resource = self.codebaseresources.get_or_none(path=path)
if resource:
return resource

@cached_property
def can_change_inputs(self):
"""
Expand Down Expand Up @@ -2982,6 +2995,8 @@ class AbstractPackage(models.Model):
blank=True,
help_text=_("A notice text for this package."),
)
is_private = models.BooleanField(default=False)
is_virtual = models.BooleanField(default=False)
datasource_ids = models.JSONField(
default=list,
blank=True,
Expand Down Expand Up @@ -3454,6 +3469,7 @@ class DiscoveredDependency(
is_runtime = models.BooleanField(default=False)
is_optional = models.BooleanField(default=False)
is_resolved = models.BooleanField(default=False)
is_direct = models.BooleanField(default=False)
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved

objects = DiscoveredDependencyQuerySet.as_manager()

Expand All @@ -3474,6 +3490,7 @@ class Meta:
models.Index(fields=["is_runtime"]),
models.Index(fields=["is_optional"]),
models.Index(fields=["is_resolved"]),
models.Index(fields=["is_direct"]),
]
constraints = [
models.UniqueConstraint(
Expand Down Expand Up @@ -3520,6 +3537,7 @@ def create_from_data(
project,
dependency_data,
for_package=None,
resolved_to_package=None,
datafile_resource=None,
datasource_id=None,
strip_datafile_path_root=False,
Expand Down Expand Up @@ -3559,6 +3577,13 @@ def create_from_data(
package_uid=for_package_uid
)

if not resolved_to_package:
resolved_to_uid = dependency_data.get("resolved_to_uid")
if resolved_to_uid:
resolved_to_package = project.discoveredpackages.get(
package_uid=resolved_to_uid
)
tdruez marked this conversation as resolved.
Show resolved Hide resolved

if not datafile_resource:
datafile_path = dependency_data.get("datafile_path")
if datafile_path:
Expand All @@ -3584,10 +3609,36 @@ def create_from_data(
return cls.objects.create(
project=project,
for_package=for_package,
resolved_to_package=resolved_to_package,
datafile_resource=datafile_resource,
**cleaned_data,
)

@classmethod
def extract_purl_data(cls, dependency_data, ignore_nulls=False):
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
purl_mapping = PackageURL.from_string(
purl=dependency_data.get("purl"),
).to_dict()
purl_data = {}

for field_name in PURL_FIELDS:
value = purl_mapping.get(field_name)
if field_name == "qualifiers":
value = normalize_qualifiers(value, encode=True)
if not ignore_nulls:
purl_data[field_name] = value or ""
else:
if value:
purl_data[field_name] = value or ""

return purl_data

@classmethod
def populate_dependency_uuid(cls, dependency_data):
purl = PackageURL.from_string(purl=dependency_data.get("purl"))
purl.qualifiers["uuid"] = str(uuid.uuid4())
dependency_data["dependency_uid"] = purl.to_string()

@property
def spdx_id(self):
return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.dependency_uid}"
Expand Down
20 changes: 12 additions & 8 deletions scanpipe/pipelines/inspect_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from scanpipe.pipelines import group
from scanpipe.pipelines.scan_codebase import ScanCodebase
from scanpipe.pipes import scancode

Expand Down Expand Up @@ -49,23 +50,26 @@ def steps(cls):
cls.flag_empty_files,
cls.flag_ignored_resources,
cls.scan_for_application_packages,
cls.create_packages_and_dependencies,
cls.resolve_dependencies,
)

def scan_for_application_packages(self):
"""
Scan resources for package information to add DiscoveredPackage
and DiscoveredDependency objects from detected package data.
"""
# `assemble` is set to False because here in this pipeline we
# only detect package_data in resources and create
# Package/Dependency instances directly instead of assembling
# the packages and assigning files to them
scancode.scan_for_application_packages(
project=self.project,
assemble=False,
assemble=True,
package_only=True,
progress_logger=self.log,
)

def create_packages_and_dependencies(self):
scancode.process_package_data(self.project)
@group("Static Resolver")
def resolve_dependencies(self):
"""
Create packages and dependency relationships from
lockfiles or manifests containing pre-resolved
dependencies.
"""
scancode.resolve_dependencies(project=self.project)
19 changes: 19 additions & 0 deletions scanpipe/pipelines/resolve_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from scanpipe.pipelines import group
from scanpipe.pipelines.scan_codebase import ScanCodebase
from scanpipe.pipes import resolve
from scanpipe.pipes import scancode


class ResolveDependencies(ScanCodebase):
Expand All @@ -45,6 +47,8 @@ def steps(cls):
cls.collect_and_create_codebase_resources,
cls.flag_ignored_resources,
cls.get_manifest_inputs,
cls.scan_for_application_packages,
cls.create_packages_and_dependencies,
cls.get_packages_from_manifest,
cls.create_resolved_packages,
)
Expand All @@ -53,6 +57,20 @@ def get_manifest_inputs(self):
"""Locate package manifest files with a supported package resolver."""
self.manifest_resources = resolve.get_manifest_resources(self.project)

@group("Static Resolver")
def scan_for_application_packages(self):
scancode.scan_for_application_packages(
self.project,
assemble=False,
resource_qs=self.manifest_resources,
progress_logger=self.log,
)

@group("Static Resolver")
def create_packages_and_dependencies(self):
scancode.process_package_data(self.project, static_resolve=True)

@group("Dynamic Resolver")
def get_packages_from_manifest(self):
"""
Resolve package data from lockfiles/requirement files with package
Expand All @@ -65,6 +83,7 @@ def get_packages_from_manifest(self):
model="get_packages_from_manifest",
)

@group("Dynamic Resolver")
def create_resolved_packages(self):
"""Create the resolved packages and their dependencies in the database."""
resolve.create_packages_and_dependencies(
Expand Down
32 changes: 29 additions & 3 deletions scanpipe/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,12 @@ def _clean_package_data(package_data):
return package_data


def update_or_create_package(project, package_data, codebase_resources=None):
def update_or_create_package(
project,
package_data,
codebase_resources=None,
is_virtual=False,
):
"""
Get, update or create a DiscoveredPackage then return it.
Use the `project` and `package_data` mapping to lookup and creates the
Expand All @@ -194,6 +199,9 @@ def update_or_create_package(project, package_data, codebase_resources=None):
package = DiscoveredPackage.create_from_data(project, package_data)

if package:
if is_virtual:
package.update(is_virtual=is_virtual)
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved

if datasource_id and datasource_id not in package.datasource_ids:
datasource_ids = package.datasource_ids.copy()
datasource_ids.append(datasource_id)
Expand Down Expand Up @@ -239,6 +247,7 @@ def update_or_create_dependency(
project,
dependency_data,
for_package=None,
resolved_to_package=None,
datafile_resource=None,
datasource_id=None,
strip_datafile_path_root=False,
Expand All @@ -254,27 +263,44 @@ def update_or_create_dependency(
corresponding CodebaseResource for `datafile_path`. This is used in the case
where Dependency data is imported from a scancode-toolkit scan, where the
root path segments are not stripped for `datafile_path`.
If the dependency is resolved and a resolved package is created, we have the
corresponsing package_uid at `resolved_to`.
"""
dependency = None
dependency_uid = dependency_data.get("dependency_uid")
extracted_requirement = dependency_data.get("extracted_requirement")

if ignore_dependency_scope(project, dependency_data):
return # Do not create the DiscoveredDependency record.

if not dependency_uid:
dependency_data["dependency_uid"] = uuid.uuid4()
purl_data = DiscoveredDependency.extract_purl_data(dependency_data)
dependency = DiscoveredDependency.objects.get_or_none(
project=project,
extracted_requirement=extracted_requirement,
**purl_data,
)
else:
dependency = project.discovereddependencies.get_or_none(
dependency = DiscoveredDependency.objects.get_or_none(
project=project,
dependency_uid=dependency_uid,
)

if dependency:
dependency.update_from_data(dependency_data)
if resolved_to_package and not dependency.resolved_to_package:
dependency.update(resolved_to_package=resolved_to_package)
else:
is_direct = dependency_data.get("is_direct")
if not is_direct:
pass

DiscoveredDependency.populate_dependency_uuid(dependency_data)
dependency = DiscoveredDependency.create_from_data(
project,
dependency_data,
for_package=for_package,
resolved_to_package=resolved_to_package,
datafile_resource=datafile_resource,
datasource_id=datasource_id,
strip_datafile_path_root=strip_datafile_path_root,
Expand Down
Loading