diff --git a/src/opossum_lib/scancode/convert_scancode_to_opossum.py b/src/opossum_lib/scancode/convert_scancode_to_opossum.py index b39e945..7a1439b 100644 --- a/src/opossum_lib/scancode/convert_scancode_to_opossum.py +++ b/src/opossum_lib/scancode/convert_scancode_to_opossum.py @@ -21,10 +21,7 @@ def convert_scancode_to_opossum(filename: str) -> OpossumFileContent: scancode_data = load_scancode_json(filename) - filetree = scancode_to_file_tree(scancode_data) - resources = filetree.to_opossum_resources() - with open("debug.json", "w") as out: - out.write(resources[0].model_dump_json(indent=4, by_alias=True)) + resources = [scancode_to_file_tree(scancode_data)] scancode_header = extract_scancode_header(scancode_data, filename) metadata = opossum_model.Metadata( diff --git a/src/opossum_lib/scancode/helpers.py b/src/opossum_lib/scancode/helpers.py deleted file mode 100644 index 1ba1fd3..0000000 --- a/src/opossum_lib/scancode/helpers.py +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-FileCopyrightText: TNG Technology Consulting GmbH -# -# SPDX-License-Identifier: Apache-2.0 - - -import os.path - -from pydantic import BaseModel -from pydantic_core import SchemaValidator - - -def path_segments(path: str) -> list[str]: - path = os.path.normpath(path) - return path.split(os.sep) - - -def check_schema(model: BaseModel) -> None: - schema_validator = SchemaValidator(schema=model.__pydantic_core_schema__) - schema_validator.validate_python(model.__dict__) diff --git a/src/opossum_lib/scancode/resource_tree.py b/src/opossum_lib/scancode/resource_tree.py index 15e50a9..e48399e 100644 --- a/src/opossum_lib/scancode/resource_tree.py +++ b/src/opossum_lib/scancode/resource_tree.py @@ -5,59 +5,25 @@ from __future__ import annotations -from pydantic import BaseModel +from pathlib import Path import opossum_lib.opossum_model as opossum_model from opossum_lib.scancode.constants import SCANCODE_SOURCE_NAME -from opossum_lib.scancode.helpers import check_schema, path_segments from opossum_lib.scancode.model import File, FileType, ScanCodeData -class ScanCodeFileTree(BaseModel): - file: File - children: dict[str, ScanCodeFileTree] = {} - - def get_path(self, path: list[str]) -> ScanCodeFileTree: - if len(path) == 0: - return self - next_segment, *rest = path - if next_segment not in self.children: - self.children[next_segment] = ScanCodeFileTree.model_construct(None) # type: ignore - return self.children[next_segment].get_path(rest) - - def revalidate(self) -> None: - check_schema(self) - for child in self.children.values(): - child.revalidate() - - def to_opossum_resources( - self, - ) -> list[opossum_model.Resource]: - def process_node( - node: ScanCodeFileTree, - ) -> opossum_model.Resource: - return opossum_model.Resource( - path=node.file.path, - attributions=get_attribution_info(node.file), - type=convert_resource_type(node.file.type), - children={ - key: process_node(child) for (key, child) in node.children.items() - }, - ) - - return [process_node(self)] - - -def scancode_to_file_tree(scancode_data: ScanCodeData) -> ScanCodeFileTree: - temp_root = ScanCodeFileTree.model_construct(file=None) # type: ignore +def scancode_to_file_tree(scancode_data: ScanCodeData) -> opossum_model.Resource: + temp_root = opossum_model.Resource(path=Path("")) for file in scancode_data.files: - segments = path_segments(file.path) - temp_root.get_path(segments).file = file + resource = opossum_model.Resource( + path=Path(file.path), + attributions=get_attribution_info(file), + type=convert_resource_type(file.type), + ) + temp_root.add_resource(resource) assert len(temp_root.children) == 1 - root = list(temp_root.children.values())[0] - check_schema(root) - return root + return list(temp_root.children.values())[0] def get_attribution_info(file: File) -> list[opossum_model.OpossumPackage]: diff --git a/tests/test_scancode/model_helpers.py b/tests/test_scancode/model_helpers.py index d7b6162..c453b45 100644 --- a/tests/test_scancode/model_helpers.py +++ b/tests/test_scancode/model_helpers.py @@ -14,6 +14,16 @@ ) +def _create_reference_scancode_files() -> list[File]: + return [ + _create_file("A", FileType.DIRECTORY), + _create_file("A/B", FileType.DIRECTORY), + _create_file("A/file1", FileType.FILE), + _create_file("A/file2.txt", FileType.FILE), + _create_file("A/B/file3", FileType.FILE), + ] + + def _create_file( path: str, type: FileType, diff --git a/tests/test_scancode/test_resource_tree.py b/tests/test_scancode/test_resource_tree.py index 89a5955..70701c0 100644 --- a/tests/test_scancode/test_resource_tree.py +++ b/tests/test_scancode/test_resource_tree.py @@ -4,83 +4,20 @@ from copy import deepcopy -import pytest -from pydantic import ValidationError - from opossum_lib.opossum_model import OpossumPackage, SourceInfo from opossum_lib.scancode.constants import SCANCODE_SOURCE_NAME from opossum_lib.scancode.model import ( Copyright, - File, FileBasedLicenseDetection, FileType, Match, - ScanCodeData, ) from opossum_lib.scancode.resource_tree import ( - ScanCodeFileTree, get_attribution_info, - scancode_to_file_tree, ) from tests.test_scancode.model_helpers import _create_file -class TestRevalidate: - def test_successfully_revalidate_valid_file_tree(self) -> None: - dummy_file = _create_file("A", FileType.FILE) - valid_structure = ScanCodeFileTree( - file=dummy_file, - children={ - "A": ScanCodeFileTree(file=dummy_file), - "B": ScanCodeFileTree( - file=dummy_file, children={"C": ScanCodeFileTree(file=dummy_file)} - ), - }, - ) - valid_structure.revalidate() - - def test_fail_to_revalidate_file_tree_invalid_at_toplevel(self) -> None: - dummy_file = _create_file("A", FileType.FILE) - invalid_structure = ScanCodeFileTree.model_construct( - children={ - "A": ScanCodeFileTree(file=dummy_file), - "B": ScanCodeFileTree( - file=dummy_file, children={"C": ScanCodeFileTree(file=dummy_file)} - ), - }, - file=None, # type: ignore - ) - with pytest.raises(ValidationError): - invalid_structure.revalidate() - - def test_fail_to_revalidate_file_tree_invalid_only_at_lower_level(self) -> None: - dummy_file = _create_file("A", FileType.FILE) - invalid_structure = ScanCodeFileTree( - file=dummy_file, - children={ - "A": ScanCodeFileTree(file=dummy_file), - "B": ScanCodeFileTree( - file=dummy_file, - children={"C": ScanCodeFileTree.model_construct(None)}, # type: ignore - ), - }, - ) - with pytest.raises(ValidationError): - invalid_structure.revalidate() - - -def test_scancode_to_resource_tree_produces_expected_result() -> None: - files = _create_reference_scancode_files() - scancode_data = ScanCodeData( - headers=[], packages=[], dependencies=[], license_detections=[], files=files - ) - - tree = scancode_to_file_tree(scancode_data) - reference = _create_reference_node_structure() - - assert tree == reference - - def test_get_attribution_info_directory() -> None: folder = _create_file("A", FileType.DIRECTORY) assert get_attribution_info(folder) == [] @@ -159,29 +96,3 @@ def test_get_attribution_info_file_multiple() -> None: attribution_confidence=50, ) assert set(attributions) == {expected1, expected2} - - -def _create_reference_scancode_files() -> list[File]: - return [ - _create_file("A", FileType.DIRECTORY), - _create_file("A/B", FileType.DIRECTORY), - _create_file("A/file1", FileType.FILE), - _create_file("A/file2.txt", FileType.FILE), - _create_file("A/B/file3", FileType.FILE), - ] - - -def _create_reference_node_structure() -> ScanCodeFileTree: - folder, subfolder, file1, file2, file3 = _create_reference_scancode_files() - inner = ScanCodeFileTree( - file=subfolder, children={"file3": ScanCodeFileTree(file=file3)} - ) - reference = ScanCodeFileTree( - file=folder, - children={ - "B": inner, - "file1": ScanCodeFileTree(file=file1), - "file2.txt": ScanCodeFileTree(file=file2), - }, - ) - return reference