Skip to content

Commit

Permalink
Address feedback
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Jun 28, 2024
1 parent fae73bf commit abaaf95
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 43 deletions.
39 changes: 36 additions & 3 deletions scanpipe/migrations/0062_dependency_resolver_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,50 @@ class Migration(migrations.Migration):
migrations.AddField(
model_name="discovereddependency",
name="is_direct",
field=models.BooleanField(default=False),
field=models.BooleanField(
default=False,
help_text="True if this is a direct, first-level dependency relationship for a package.",
),
),
migrations.AddField(
model_name="discoveredpackage",
name="is_private",
field=models.BooleanField(default=False),
field=models.BooleanField(
default=False,
help_text="True if this is a private package, either not meant to be published on a repository, and/or a local package without a name and version used primarily to track dependencies and other information.",
),
),
migrations.AddField(
model_name="discoveredpackage",
name="is_virtual",
field=models.BooleanField(default=False),
field=models.BooleanField(
default=False,
help_text="True if this package is created only from a manifest or lockfile, and not from its actual packaged code. The files of this package are not present in the codebase.",
),
),
migrations.AlterField(
model_name="discovereddependency",
name="is_optional",
field=models.BooleanField(
default=False,
help_text="True if this dependency is an optional dependency",
),
),
migrations.AlterField(
model_name="discovereddependency",
name="is_resolved",
field=models.BooleanField(
default=False,
help_text="True if this dependency version requirement has been pinned and this dependency points to an exact version.",
),
),
migrations.AlterField(
model_name="discovereddependency",
name="is_runtime",
field=models.BooleanField(
default=False,
help_text="True if this dependency is a runtime dependency.",
),
),
migrations.AddIndex(
model_name="discovereddependency",
Expand Down
19 changes: 17 additions & 2 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3074,8 +3074,23 @@ class AbstractPackage(models.Model):
blank=True,
help_text=_("A notice text for this package."),
)
is_private = models.BooleanField(default=False)
is_virtual = models.BooleanField(default=False)
is_private = models.BooleanField(
default=False,
help_text=_(
"True if this is a private package, either not meant to be "
"published on a repository, and/or a local package without a "
"name and version used primarily to track dependencies and "
"other information."
),
)
is_virtual = models.BooleanField(
default=False,
help_text=_(
"True if this package is created only from a manifest or lockfile, "
"and not from its actual packaged code. The files of this package "
"are not present in the codebase."
),
)
datasource_ids = models.JSONField(
default=list,
blank=True,
Expand Down
6 changes: 3 additions & 3 deletions scanpipe/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,12 @@ def update_or_create_dependency(
dependencies.append(dependency)

for dependency in dependencies:
# This dependency relationship is for a new package
if (
is_for_new_package = (
for_package
and dependency.for_package
and dependency.for_package != for_package
):
)
if is_for_new_package:
DiscoveredDependency.populate_dependency_uuid(dependency_data)
dependency = DiscoveredDependency.create_from_data(
project,
Expand Down
86 changes: 61 additions & 25 deletions scanpipe/pipes/scancode.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,38 +489,73 @@ def process_package_data(project, static_resolve=False):
for resource in project.codebaseresources.has_package_data():
logger.info(f" Processing: {resource.path}")
for package_mapping in resource.package_data:
pd = packagedcode_models.PackageData.from_dict(mapping=package_mapping)
if not pd.can_assemble:
continue
create_packages_and_dependencies_from_mapping(
project=project,
resource=resource,
package_mapping=package_mapping,
find_package=False,
)

logger.info(f" Package data: {pd.purl}")
if static_resolve:
resolve_dependencies(project)

package_data = pd.to_dict()
dependencies = package_data.pop("dependencies")

package = None
if pd.purl:
package = pipes.update_or_create_package(
project=project,
package_data=package_data,
codebase_resources=[resource],
)
def create_packages_and_dependencies_from_mapping(
project,
resource,
package_mapping,
find_package=False,
):
"""
Create or update packages and dependencies from a `package_mapping`,
for a respective `resource` and `project`.
for dep in dependencies:
pipes.update_or_create_dependency(
project=project,
dependency_data=dep,
for_package=package,
datafile_resource=resource,
datasource_id=pd.datasource_id,
)
If `find_package` is True, find the package with the respective purl data,
instead of trying to create it.
"""
pd = packagedcode_models.PackageData.from_dict(mapping=package_mapping)
if not pd.can_assemble:
return

if static_resolve:
resolve_dependencies(project)
logger.info(f" Package data: {pd.purl}")

package_data = pd.to_dict()
dependencies = package_data.pop("dependencies")

def resolve_dependencies(project):
package = None
if pd.purl:
if find_package:
purl_data = DiscoveredPackage.extract_purl_data(package_mapping)
packages = DiscoveredPackage.objects.filter(
project=project,
**purl_data,
)

for package in packages:
if resource.location in package.datafile_paths:
break
else:
package = pipes.update_or_create_package(
project=project,
package_data=package_data,
codebase_resources=[resource],
)

update_packages_and_dependencies(
project=project,
dependencies=dependencies,
package=package,
resource=resource,
datasource_id=pd.datasource_id,
process_resolved=False,
)


def resolve_dependencies(project):
"""
Match and merge resolved dependencies to create a dependency graph of
direct dependency relations between resolved packages.
"""
logger.info(f"Project {project} resolve_dependencies:")
for resource in project.codebaseresources.has_package_data():
for package_mapping in resource.package_data:
Expand Down Expand Up @@ -555,6 +590,7 @@ def update_packages_and_dependencies(
package,
resource,
datasource_id,
process_resolved=True,
):
"""
Create DiscoveredPackage and DiscoveredDependency objects from
Expand All @@ -564,7 +600,7 @@ def update_packages_and_dependencies(
for dep in dependencies:
resolved_package = dep.get("resolved_package") or {}
resolved_to_package = None
if resolved_package:
if process_resolved and resolved_package:
resolved_to_package = pipes.update_or_create_package(
project=project,
package_data=resolved_package,
Expand Down
10 changes: 10 additions & 0 deletions scanpipe/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from django.test.utils import CaptureQueriesContext
from django.utils import timezone

from packageurl import PackageURL
from packagedcode.models import PackageData
from requests.exceptions import RequestException
from rq.job import JobStatus
Expand All @@ -65,6 +66,7 @@
from scanpipe.models import UUIDTaggedItem
from scanpipe.models import convert_glob_to_django_regex
from scanpipe.models import get_project_work_directory
from scanpipe.models import normalize_package_url_data
from scanpipe.pipes.fetch import Download
from scanpipe.pipes.input import copy_input
from scanpipe.tests import dependency_data1
Expand Down Expand Up @@ -730,6 +732,14 @@ def test_scanpipe_project_get_ignored_dependency_scopes_index(self):
expected = {"npm": ["devDependencies"], "pypi": ["tests", "build"]}
self.assertEqual(expected, self.project1.get_ignored_dependency_scopes_index())

def test_scanpipe_normalize_package_url_data(self):
purl = PackageURL.from_string("pkg:npm/[email protected]")
purl_data = normalize_package_url_data(purl_mapping=purl.to_dict())
self.assertEqual(purl_data.get("namespace"), "")

purl_data = normalize_package_url_data(purl_mapping=purl.to_dict(), ignore_nulls=True)
self.assertEqual(purl_data.get("namespace"), None)

def test_scanpipe_project_get_ignored_vulnerabilities_set(self):
self.project1.settings = {
"ignored_vulnerabilities": [
Expand Down
17 changes: 7 additions & 10 deletions scanpipe/tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,12 @@ def _normalize_package_uids(self, data):
Return the `data`, where any `package_uid` value has been normalized
with `purl_with_fake_uuid()`
"""
fields_with_package_uids = [
"package_uid",
"dependency_uid",
"for_package_uid",
"resolved_to_package_uid"
]
if isinstance(data, list):
return [self._normalize_package_uids(entry) for entry in data]

Expand All @@ -568,16 +574,7 @@ def _normalize_package_uids(self, data):
for key, value in data.items():
if isinstance(value, (list, dict)):
value = self._normalize_package_uids(value)
if (
key
in (
"package_uid",
"dependency_uid",
"for_package_uid",
"resolved_to_package_uid",
)
and value
):
if key in fields_with_package_uids and value:
value = purl_with_fake_uuid(value)
if key == "for_packages" and value:
value = sorted(
Expand Down

0 comments on commit abaaf95

Please sign in to comment.