Skip to content

Commit

Permalink
Update cocoapods podfile.lock parser
Browse files Browse the repository at this point in the history
Get all the dependencies from a podfile.lock cocoapods lockfile
with its dependency graph information preserved, with resolved
package versions, direct dependency relationships, dependency
requirements and other extra data.

Reference: aboutcode-org/scancode.io#1279
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Jun 26, 2024
1 parent e4f6267 commit 203dec1
Show file tree
Hide file tree
Showing 11 changed files with 5,132 additions and 125 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ v33.0.0 (next next, roadmap)
from nuget lockfile `packages.lock.json`.
See https://github.com/nexB/scancode-toolkit/pull/3825

- Add support for parsing resolved packages and dependency relationships
from cocoapods lockfile `Podfile.lock`.
See https://github.com/nexB/scancode-toolkit/pull/3827

v32.2.0 - 2024-06-19
----------------------

Expand Down
285 changes: 255 additions & 30 deletions src/packagedcode/cocoapods.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@

from packagedcode import models
from packagedcode import spec
from packagedcode import utils
from packagedcode.utils import get_base_purl
from packagedcode.utils import build_description

"""
Handle cocoapods packages manifests for macOS and iOS
Expand Down Expand Up @@ -232,7 +233,7 @@ def parse(cls, location, package_only=False):
extracted_license_statement = podspec.get('license')
summary = podspec.get('summary')
description = podspec.get('description')
description = utils.build_description(
description = build_description(
summary=summary,
description=description,
)
Expand Down Expand Up @@ -292,6 +293,96 @@ class PodfileLockHandler(BasePodHandler):
default_primary_language = 'Objective-C'
description = 'Cocoapods Podfile.lock'
documentation_url = 'https://guides.cocoapods.org/using/the-podfile.html'
is_lockfile = True

@classmethod
def get_pods_dependency_with_resolved_package(
cls,
dependency_data,
main_pod,
dependencies_for_resolved=[],
):
"""
Get a DependentPackage object with its resolved package and
dependencies from the `main_pod` string, with additional data
populated from the `PodfileLockDataByPurl` mappings.
"""
purl, xreq = parse_dep_requirements(main_pod)
base_purl = get_base_purl(purl.to_string())

resolved_package_mapping = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language=cls.default_primary_language,
namespace=purl.namespace,
name=purl.name,
version=purl.version,
dependencies=dependencies_for_resolved,
is_virtual=True,
)
resolved_package = models.PackageData.from_data(resolved_package_mapping)

checksum = dependency_data.checksum_by_base_purl.get(base_purl)
if checksum:
resolved_package.sha1 = checksum

is_direct = False
if base_purl in dependency_data.direct_dependency_purls:
is_direct = True

spec_repo = dependency_data.spec_by_base_purl.get(base_purl)
if spec_repo:
resolved_package.extra_data["spec_repo"] = spec_repo

external_source = dependency_data.external_sources_by_base_purl.get(base_purl)
if external_source:
resolved_package.extra_data["external_source"] = external_source

return models.DependentPackage(
purl=purl.to_string(),
# FIXME: why dev?
scope='requires',
extracted_requirement=xreq,
is_runtime=False,
is_optional=True,
is_resolved=True,
is_direct=is_direct,
resolved_package=resolved_package,
)

@classmethod
def get_dependencies_for_resolved_package(cls, dependency_data, dep_pods):
"""
Get the list of dependencies with versions and version requirements
for a cocoapods resolved package.
"""
dependencies_for_resolved = []
for dep_pod in dep_pods:
dep_purl, dep_xreq = parse_dep_requirements(dep_pod)
base_dep_purl = get_base_purl(dep_purl.to_string())

dep_version = dependency_data.versions_by_base_purl.get(base_dep_purl)
if dep_version:
purl_mapping = dep_purl.to_dict()
purl_mapping["version"] = dep_version
dep_purl = PackageURL(**purl_mapping)

if not dep_xreq:
dep_xreq = dep_version

dependency_for_resolved = models.DependentPackage(
purl=dep_purl.to_string(),
# FIXME: why dev?
scope='requires',
extracted_requirement=dep_xreq,
is_runtime=False,
is_optional=True,
is_resolved=True,
is_direct=True,
).to_dict()
dependencies_for_resolved.append(dependency_for_resolved)

return dependencies_for_resolved

@classmethod
def parse(cls, location, package_only=False):
Expand All @@ -301,52 +392,145 @@ def parse(cls, location, package_only=False):
with open(location) as pfl:
data = saneyaml.load(pfl)

pods = data['PODS']
dependency_data = PodfileLockDataByPurl.collect_dependencies_data_by_purl(
data=data,
package_type=cls.default_package_type,
)

dependencies = []

pods = data.get('PODS') or []
for pod in pods:
# dependencies with mappings have direct dependencies
if isinstance(pod, dict):
for main_pod, _dep_pods in pod.items():

purl, xreq = parse_dep_requirements(main_pod)

dependencies.append(
models.DependentPackage(
purl=str(purl),
# FIXME: why dev?
scope='requires',
extracted_requirement=xreq,
is_runtime=False,
is_optional=True,
is_resolved=True,
)
for main_pod, dep_pods in pod.items():
dependencies_for_resolved = cls.get_dependencies_for_resolved_package(
dependency_data=dependency_data,
dep_pods=dep_pods,
)
dependency = cls.get_pods_dependency_with_resolved_package(
dependency_data=dependency_data,
main_pod=main_pod,
dependencies_for_resolved=dependencies_for_resolved,
)
dependencies.append(dependency)

# These packages have no direct dependencies
elif isinstance(pod, str):

purl, xreq = parse_dep_requirements(pod)

dependencies.append(
models.DependentPackage(
purl=str(purl),
# FIXME: why dev?
scope='requires',
extracted_requirement=xreq,
is_runtime=False,
is_optional=True,
is_resolved=True,
)
dependency = cls.get_pods_dependency_with_resolved_package(
dependency_data, pod,
)
dependencies.append(dependency)

podfile_checksum = data.get('PODFILE CHECKSUM')
cocoapods_version = data.get('COCOAPODS')
extra_data = {
'cocoapods': cocoapods_version,
'podfile_checksum': podfile_checksum,
}

package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language=cls.default_primary_language,
dependencies=dependencies,
extra_data=extra_data,
)
yield models.PackageData.from_data(package_data, package_only)


class PodfileLockDataByPurl:
"""
Podfile.lock locskfiles contains information about its cocoapods
dependencies in multiple parallel lists by it's name.
These are:
- PODS : Dependency graph with resolved package versions, dependency
relationships and dependency requirements
- DEPENDENCIES : list of direct dependencies
- SPEC REPOS : location of spec repo having the package metadata podspec
- SPEC CHECKSUMS : sha1 checksums of the package
- CHECKOUT OPTIONS : the version control system info for the package with exact commit
- EXTERNAL SOURCES : External source for a package, locally, or in a external vcs repo
Additionally the resolved package version for dependencies are also only
present in the top-level, but not in the dependency relationships.
This class parses these information and stores them in mappings by purl.
"""

versions_by_base_purl = {}
direct_dependency_purls = []
spec_by_base_purl = {}
checksum_by_base_purl = {}
external_sources_by_base_purl = {}

@classmethod
def collect_dependencies_data_by_purl(cls, data, package_type):
"""
Parse and populate cocoapods dependency information by purl,
from the `data` mapping.
"""
dep_data = cls()

# collect versions of all dependencies
pods = data.get('PODS') or []
for pod in pods:
if isinstance(pod, dict):
for main_pod, _dep_pods in pod.items():
purl, xreq = parse_dep_requirements(main_pod)
base_purl = get_base_purl(purl.to_string())
dep_data.versions_by_base_purl[base_purl] = xreq

elif isinstance(pod, str):
purl, xreq = parse_dep_requirements(pod)
base_purl = get_base_purl(purl.to_string())
dep_data.versions_by_base_purl[base_purl] = xreq

direct_dependencies = data.get('DEPENDENCIES') or []
for direct_dep in direct_dependencies:
purl, _xreq = parse_dep_requirements(direct_dep)
base_purl = get_base_purl(purl.to_string())
dep_data.direct_dependency_purls.append(base_purl)

spec_repos = data.get('SPEC REPOS') or {}
for spec_repo, packages in spec_repos.items():
for package in packages:
purl, _xreq = parse_dep_requirements(package)
base_purl = get_base_purl(purl.to_string())
dep_data.spec_by_base_purl[base_purl] = spec_repo

checksums = data.get('SPEC CHECKSUMS') or {}
for name, checksum in checksums.items():
purl, _xreq = parse_dep_requirements(name)
base_purl = get_base_purl(purl.to_string())
dep_data.checksum_by_base_purl[base_purl] = checksum

checkout_options = data.get('CHECKOUT OPTIONS') or {}
for name, source in checkout_options.items():
processed_source = process_external_source(source)
base_purl = PackageURL(
type=package_type,
name=name,
).to_string()
dep_data.external_sources_by_base_purl[base_purl] = processed_source

external_sources = data.get('EXTERNAL SOURCES') or {}
for name, source in external_sources.items():
base_purl = PackageURL(
type=package_type,
name=name,
).to_string()

# `CHECKOUT OPTIONS` is more verbose than `EXTERNAL SOURCES`
if base_purl in dep_data.external_sources_by_base_purl:
continue
processed_source = process_external_source(source)
dep_data.external_sources_by_base_purl[base_purl] = processed_source

return dep_data


class PodspecJsonHandler(models.DatafileHandler):
datasource_id = 'cocoapods_podspec_json'
path_patterns = ('*.podspec.json',)
Expand Down Expand Up @@ -566,3 +750,44 @@ def parse_dep_requirements(dep):
version=version,
)
return purl, requirement


def process_external_source(source_mapping):
"""
Process dependencies with external sources into
a path or URL string.
Some examples:
boost:
:podspec: "../node_modules/react-native/third-party-podspecs/boost.podspec"
Pulley:
:branch: master
:git: https://github.com/artsy/Pulley.git
SnapKit:
:branch: xcode102
:git: "[email protected]:alanzeino/SnapKit.git"
SwiftyJSON:
:commit: af76cf3ef710b6ca5f8c05f3a31307d44a3c5828
:git: https://github.com/SwiftyJSON/SwiftyJSON/
tipsi-stripe:
:path: "../node_modules/tipsi-stripe"
"""

# this could be either `:path`, `:podspec` or `:git`
if len(source_mapping.keys()) == 1:
return str(list(source_mapping.values()).pop())

# this is a link to a git repository
elif len(source_mapping.keys()) == 2 and ':git' in source_mapping:
repo_url = source_mapping.get(':git').replace('.git', '').replace('git@', 'https://')
repo_url = repo_url.rstrip('/')
if ':commit' in source_mapping:
commit = source_mapping.get(':commit')
return f"{repo_url}/tree/{commit}"
elif ':branch' in source_mapping:
branch = source_mapping.get(':branch')
return f"{repo_url}/tree/{branch}"

# In all other cases
return str(source_mapping)
Loading

0 comments on commit 203dec1

Please sign in to comment.