Skip to content

Commit

Permalink
WIP on the fetch management command #138
Browse files Browse the repository at this point in the history
Signed-off-by: tdruez <[email protected]>
  • Loading branch information
tdruez committed Aug 6, 2024
1 parent 525d8d3 commit 5712a50
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 39 deletions.
103 changes: 66 additions & 37 deletions component_catalog/management/commands/fetchvulnerabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,75 @@
from dje.management.commands import DataspacedCommand
from dje.utils import chunked

# TODO: Retry failures
# ERROR VulnerableCode [Exception] HTTPSConnectionPool(host='public.vulnerablecode.io',
# port=443): Read timed out. (read timeout=10)


def fetch_from_vulnerablecode(vulnerablecode, batch_size, timeout, logger):
dataspace = vulnerablecode.dataspace
package_qs = Package.objects.scope(dataspace).has_package_url()
logger.write(f"{package_qs.count()} Packages in the queue.")

# TODO: Add support for Component
component_qs = Component.objects.scope(dataspace).exclude(cpe="")
logger.write(f"{component_qs.count()} Components in the queue.")

# TODO: Replace this by a create_or_update
Vulnerability.objects.all().delete()
vulnerability_qs = Vulnerability.objects.scope(dataspace)

for packages_batch in chunked(package_qs, chunk_size=batch_size):
entries = vulnerablecode.get_vulnerable_purls(
packages_batch, purl_only=False, timeout=timeout
)
for entry in entries:
affected_by_vulnerabilities = entry.get("affected_by_vulnerabilities")
if not affected_by_vulnerabilities:
continue

affected_packages = package_qs.filter(
type=entry.get("type"),
namespace=entry.get("namespace") or "",
name=entry.get("name"),
version=entry.get("version") or "",
# qualifiers=entry.get("qualifiers") or {},
subpath=entry.get("subpath") or "",
)
if not affected_packages:
raise CommandError("Could not find package!")

for vulnerability in affected_by_vulnerabilities:
vulnerability_id = vulnerability["vulnerability_id"]
if vulnerability_qs.filter(vulnerability_id=vulnerability_id).exists():
continue # -> TODO: Update from data in that case?
Vulnerability.create_from_data(
dataspace=dataspace,
data=vulnerability,
affected_packages=affected_packages,
)


class Command(DataspacedCommand):
help = "Fetch vulnerabilities for the provided Dataspace"

def add_arguments(self, parser):
super().add_arguments(parser)
parser.add_argument(
"--batch-size",
type=int,
default=10,
help="Specifies the number of objects per requests to the VulnerableCode service",
)
parser.add_argument(
"--timeout",
type=int,
default=10,
help="Request timeout in seconds",
)

def handle(self, *args, **options):
super().handle(*args, **options)
# TODO: chunk_size=10 -> make this configurable and see figure out best default

vulnerablecode = VulnerableCode(self.dataspace)

Expand All @@ -31,39 +93,6 @@ def handle(self, *args, **options):
if not vulnerablecode.is_configured():
raise CommandError("VulnerableCode is not configured.")

package_qs = Package.objects.scope(self.dataspace).has_package_url()
self.stdout.write(f"{package_qs.count()} Packages in the queue.")
# TODO:
component_qs = Component.objects.scope(self.dataspace).exclude(cpe="")
self.stdout.write(f"{component_qs.count()} Components in the queue.")

# TODO: Replace this by a create_or_update
Vulnerability.objects.all().delete()
vulnerability_qs = Vulnerability.objects.scope(self.dataspace)

for packages_batch in chunked(package_qs, chunk_size=10):
entries = vulnerablecode.get_vulnerable_purls(packages_batch, purl_only=False)
for entry in entries:
affected_by_vulnerabilities = entry.get("affected_by_vulnerabilities")
if not affected_by_vulnerabilities:
continue

affected_packages = package_qs.filter(
type=entry.get("type"),
namespace=entry.get("namespace") or "",
name=entry.get("name"),
version=entry.get("version") or "",
# qualifiers=entry.get("qualifiers") or {},
)
if not affected_packages:
raise CommandError("Could not find package!")

for vulnerability in affected_by_vulnerabilities:
vulnerability_id = vulnerability["vulnerability_id"]
if vulnerability_qs.filter(vulnerability_id=vulnerability_id).exists():
continue # -> TODO: Update from data in that case?
Vulnerability.create_from_data(
dataspace=self.dataspace,
data=vulnerability,
affected_packages=affected_packages,
)
batch_size = options["batch_size"]
timeout = options["timeout"]
fetch_from_vulnerablecode(vulnerablecode, batch_size, timeout, logger=self.stdout)
1 change: 1 addition & 0 deletions dejacode_toolkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def __init__(self, dataspace):
if not dataspace:
raise ValueError("Dataspace must be provided.")

self.dataspace = dataspace
self.service_url = None
self.service_api_key = None
self.basic_auth_user = None
Expand Down
4 changes: 2 additions & 2 deletions dejacode_toolkit/vulnerablecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def bulk_search_by_cpes(
logger.debug(f"VulnerableCode: url={url} cpes_count={len(cpes)}")
return self.request_post(url, json=data, timeout=timeout)

def get_vulnerable_purls(self, packages, purl_only=True):
def get_vulnerable_purls(self, packages, purl_only=True, timeout=10):
"""
Return a list of PURLs for which at least one `affected_by_vulnerabilities`
was found in the VulnerableCodeDB for the given list of `packages`.
Expand All @@ -112,7 +112,7 @@ def get_vulnerable_purls(self, packages, purl_only=True):
vulnerable_purls = self.bulk_search_by_purl(
plain_purls,
purl_only=purl_only,
timeout=10,
timeout=timeout,
)
return vulnerable_purls or []

Expand Down

0 comments on commit 5712a50

Please sign in to comment.