From 17a2dac7e645690c01c7452453f62b9942324eb3 Mon Sep 17 00:00:00 2001
From: Joey Vagedes <joeyvagedes@microsoft.com>
Date: Wed, 6 Sep 2023 12:06:59 -0700
Subject: [PATCH] database: multiple bugfixes (#393)

This PR contains the following database changes:

### [bug] Account for library instances that do not exist in the DSC

The instanced_inf_table generator did not account for the fact that
library class instances can choose to only support phases.

1. Updates the DSC parser's ScopedLibraryDict to store a list of
instances that match the scope, instead of the first instance it finds.

2. The instanced_inf_table generator now checks the list of instances (
    in order of appearance in the DSC) to find the first instance that
    matches the scope, but also supports the specific phase.


### [bug] Account for library instances that support only a subset of phases.

Due to platforms performing complex includes in their DSCs, There is a
scenario in which a library instance references a library class that
does not exist. This is not an error as the library class instance may
not be consumed by a component in the platform, so the missing
dependency is acceptable.

This commit adds the ability to handle this scenario, and will not raise
an error for a missing library instance.

### [enhancement] Include library class and library instance in "LIBRARIES_USED" column of the instanced_inf table

The LIBRARIES_USED column in the instanced_inf table only contained
the the edk2 relative path to the library instance. This made it
impossible to determine (with 100% accuracy) which library class the
particular library instance was representing.

This commit changes the "LIBRARIES_USED" column to contain a list of
(lib_class, lib_instance) tuples rather than a list of lib_instances.
Please review commit messages in each for a more detailed description.

### [enhancement] Include "LIBRARY_CLASS" column in instanced_inf table

Previously, the library class name for a library INF was not included
in the table. This commit adds that information to the table as a row
to make it easier to find the library class name for a library INF.

### [bug] Better handling of RuleOverride in instanced_fv_table

When parsing an FDF file, an INF can be prepending with a RuleOverride
directive. The previous functionality to ignore this directive and
retrive the actual INF value could not handle any spaces in the
directive (such as RuleOverride = ACPITABLE). This commit expands the
ability to handle spaces in the directive.
---
 .../queries/unused_component_query.py         |   2 +-
 .../database/tables/instanced_fv_table.py     |   6 +-
 .../database/tables/instanced_inf_table.py    | 102 ++++++++++++++----
 edk2toollib/uefi/edk2/parsers/dsc_parser.py   |  12 ++-
 edk2toollib/uefi/edk2/parsers/inf_parser.py   |   3 +-
 tests.unit/database/common.py                 |   3 +-
 tests.unit/database/test_component_query.py   |   2 +-
 .../database/test_instanced_fv_table.py       |   5 +-
 .../database/test_instanced_inf_table.py      |  82 ++++++++++++--
 9 files changed, 180 insertions(+), 37 deletions(-)

diff --git a/edk2toollib/database/queries/unused_component_query.py b/edk2toollib/database/queries/unused_component_query.py
index de9b1065..d39494cb 100644
--- a/edk2toollib/database/queries/unused_component_query.py
+++ b/edk2toollib/database/queries/unused_component_query.py
@@ -91,4 +91,4 @@ def _recurse_inf(self, inf, table, library_list):
                 library_list.append(inf)
 
             for inf in result["LIBRARIES_USED"]:
-                self._recurse_inf(inf, table, library_list)
+                self._recurse_inf(inf[1], table, library_list)
diff --git a/edk2toollib/database/tables/instanced_fv_table.py b/edk2toollib/database/tables/instanced_fv_table.py
index 735564e9..4f253489 100644
--- a/edk2toollib/database/tables/instanced_fv_table.py
+++ b/edk2toollib/database/tables/instanced_fv_table.py
@@ -7,6 +7,7 @@
 # SPDX-License-Identifier: BSD-2-Clause-Patent
 ##
 """A module to generate a table containing fv information."""
+import re
 from pathlib import Path
 
 from tinyrecord import transaction
@@ -27,6 +28,9 @@ class InstancedFvTable(TableGenerator):
     |------------------------------------------------------|
     ```
     """  # noqa: E501
+
+    RULEOVERRIDE = re.compile(r'RuleOverride\s*=.+\s+(.+\.inf)', re.IGNORECASE)
+
     def __init__(self, *args, **kwargs):
         """Initialize the query with the specific settings."""
         self.env = kwargs.pop("env")
@@ -54,7 +58,7 @@ def parse(self, db: Edk2DB) -> None:
             inf_list = []  # Some INF's start with RuleOverride. We only need the INF
             for inf in fdfp.FVs[fv]["Infs"]:
                 if inf.lower().startswith("ruleoverride"):
-                    inf = inf.split(" ", 1)[-1]
+                    inf = InstancedFvTable.RULEOVERRIDE.findall(inf)[0]
                 if Path(inf).is_absolute():
                     inf = str(Path(self.pathobj.GetEdk2RelativePathFromAbsolutePath(inf)))
                 inf_list.append(Path(inf).as_posix())
diff --git a/edk2toollib/database/tables/instanced_inf_table.py b/edk2toollib/database/tables/instanced_inf_table.py
index 27275a10..2d3a9eae 100644
--- a/edk2toollib/database/tables/instanced_inf_table.py
+++ b/edk2toollib/database/tables/instanced_inf_table.py
@@ -25,9 +25,9 @@ class InstancedInfTable(TableGenerator):
 
     ``` py
     table_name = "instanced_inf"
-    |----------------------------------------------------------------------------------------------------------------------------------|
-    | DSC | GUID | LIBRARY_CLASS | PATH | PHASES | SOURCES_USED | LIBRARIES_USED | PROTOCOLS_USED | GUIDS_USED | PPIS_USED | PCDS_USED |
-    |----------------------------------------------------------------------------------------------------------------------------------|
+    |----------------------------------------------------------------------------------------------------------------------------------------------|
+    | DSC | PATH | NAME | LIBRARY_CLASS | COMPONENT | MODULE_TYPE | ARCH | SOURCES_USED | LIBRARIES_USED | PROTOCOLS_USED | GUIDS_USED | PCDS_USED |
+    |----------------------------------------------------------------------------------------------------------------------------------------------|
     ```
     """  # noqa: E501
     SECTION_LIBRARY = "LibraryClasses"
@@ -43,6 +43,19 @@ def __init__(self, *args, **kwargs):
         self.arch = self.env["TARGET_ARCH"].split(" ")  # REQUIRED
         self.target = self.env["TARGET"]  # REQUIRED
 
+        # Prevent parsing the same INF multiple times
+        self._parsed_infs = {}
+
+    def inf(self, inf: str) -> InfP:
+        """Returns a parsed INF object."""
+        if inf in self._parsed_infs:
+            infp = self._parsed_infs[inf]
+        else:
+            infp = InfP().SetEdk2Path(self.pathobj)
+            infp.ParseFile(inf)
+            self._parsed_infs[inf] = infp
+        return infp
+
     def parse(self, db: Edk2DB) -> None:
         """Parse the workspace and update the database."""
         self.pathobj = db.pathobj
@@ -106,44 +119,60 @@ def _parse_inf_recursively(
 
         Will immediately return if the INF has already been visited.
         """
+        if inf is None:
+            return []
+
         logging.debug(f"  Parsing Library: [{inf}]")
         visited.append(inf)
-        library_instances = []
+        library_instance_list = []
+        library_class_list = []
+
 
         #
         # 0. Use the existing parser to parse the INF file. This parser parses an INF as an independent file
         #    and does not take into account the context of a DSC.
         #
-        infp = InfP().SetEdk2Path(self.pathobj)
-        infp.ParseFile(inf)
+        infp = self.inf(inf)
 
         #
         # 1. Convert all libraries to their actual instances for this component. This takes into account
         #    any overrides for this component
         #
         for lib in infp.get_libraries(self.arch):
-            lib = lib.split(" ")[0].lower()
-            library_instances.append(self._lib_to_instance(lib, scope, library_dict, override_dict))
+            lib = lib.split(" ")[0]
+            library_instance_list.append(self._lib_to_instance(lib.lower(), scope, library_dict, override_dict))
+            library_class_list.append(lib)
         # Append all NULL library instances
         for null_lib in override_dict["NULL"]:
-            library_instances.append(null_lib)
+            library_instance_list.append(null_lib)
+            library_class_list.append("NULL")
+
+        to_parse = list(filter(lambda lib: lib is not None, library_instance_list))
 
         # Time to visit in libraries that we have not visited yet.
         to_return = []
-        for library in filter(lambda lib: lib not in visited, library_instances):
+        for library in filter(lambda lib: lib not in visited, to_parse):
             to_return += self._parse_inf_recursively(library, component,
                                                      library_dict, override_dict, scope, visited)
 
+        # Transform path to edk2 relative form (POSIX)
+        def to_posix(path):
+            if path is None:
+                return None
+            return Path(path).as_posix()
+        library_instance_list = list(map(to_posix, library_instance_list))
+
         # Return Paths as posix paths, which is Edk2 standard.
         to_return.append({
             "DSC": Path(self.dsc).name,
             "PATH": Path(inf).as_posix(),
             "NAME": infp.Dict["BASE_NAME"],
+            "LIBRARY_CLASS": infp.LibraryClass,
             "COMPONENT": Path(component).as_posix(),
             "MODULE_TYPE": infp.Dict["MODULE_TYPE"],
             "ARCH": scope.split(".")[0].upper(),
             "SOURCES_USED": list(map(lambda p: Path(p).as_posix(), infp.Sources)),
-            "LIBRARIES_USED": list(map(lambda p: Path(p).as_posix(), library_instances)),
+            "LIBRARIES_USED": list(zip(library_class_list, library_instance_list)),
             "PROTOCOLS_USED": [],  # TODO
             "GUIDS_USED": [],  # TODO
             "PPIS_USED": [],  # TODO
@@ -158,39 +187,68 @@ def _lib_to_instance(self, library_class_name, scope, library_dict, override_dic
         """
         arch, module = tuple(scope.split("."))
 
+        # NOTE: it is recognized that the below code could be reduced to have less repetitiveness,
+        # but I personally believe that the below makes it more clear the order in which we search
+        # for matches, and that the order is quite important.
+
         # https://tianocore-docs.github.io/edk2-DscSpecification/release-1.28/2_dsc_overview/27_[libraryclasses]_section_processing.html#27-libraryclasses-section-processing
 
         # 1. If a Library class instance (INF) is specified in the Edk2 II [Components] section (an override),
-        #    then it will be used
+        #    and the library supports the module, then it will be used.
         if library_class_name in override_dict:
             return override_dict[library_class_name]
 
         # 2/3. If the Library Class instance (INF) is defined in the [LibraryClasses.$(ARCH).$(MODULE_TYPE)] section,
-        #    then it will be used.
+        #      and the library supports the module, then it will be used.
         lookup = f'{arch}.{module}.{library_class_name}'
         if lookup in library_dict:
-            return library_dict[lookup]
+            library_instance = self._reduce_lib_instances(module, library_dict[lookup])
+            if library_instance is not None:
+                return library_instance
 
         # 4. If the Library Class instance (INF) is defined in the [LibraryClasses.common.$(MODULE_TYPE)] section,
-        #   then it will be used.
+        #    and the library supports the module, then it will be used.
         lookup = f'common.{module}.{library_class_name}'
         if lookup in library_dict:
-            return library_dict[lookup]
+            library_instance = self._reduce_lib_instances(module, library_dict[lookup])
+            if library_instance is not None:
+                return library_instance
 
         # 5. If the Library Class instance (INF) is defined in the [LibraryClasses.$(ARCH)] section,
-        #    then it will be used.
+        #    and the library supports the module, then it will be used.
         lookup = f'{arch}.{library_class_name}'
         if lookup in library_dict:
-            return library_dict[lookup]
+            library_instance = self._reduce_lib_instances(module, library_dict[lookup])
+            if library_instance is not None:
+                return library_instance
 
         # 6. If the Library Class Instance (INF) is defined in the [LibraryClasses] section,
-        #    then it will be used.
+        #    and the library supports the module, then it will be used.
         lookup = f'common.{library_class_name}'
         if lookup in library_dict:
-            return library_dict[lookup]
+            library_instance = self._reduce_lib_instances(module, library_dict[lookup])
+            if library_instance is not None:
+                return library_instance
 
         logging.debug(f'scoped library contents: {library_dict}')
         logging.debug(f'override dictionary: {override_dict}')
         e = f'Cannot find library class [{library_class_name}] for scope [{scope}] when evaluating {self.dsc}'
-        logging.error(e)
-        raise RuntimeError(e)
+        logging.warn(e)
+        return None
+
+    def _reduce_lib_instances(self, module: str, library_instance_list: list[str]) -> str:
+        """For a DSC, multiple library instances for the same library class can exist.
+
+        This is either due to a mistake by the developer, or because the library class
+        instance only supports certain modules. That is to say a library class instance
+        defining `MyLib| PEIM` and one defining `MyLib| PEI_CORE` both being defined in
+        the same LibraryClasses section is acceptable.
+
+        Due to this, we need to filter to the first library class instance that supports
+        the module type.
+        """
+        for library_instance in library_instance_list:
+            infp = self.inf(library_instance)
+            if module.lower() in [phase.lower() for phase in infp.SupportedPhases]:
+                return library_instance
+        return None
diff --git a/edk2toollib/uefi/edk2/parsers/dsc_parser.py b/edk2toollib/uefi/edk2/parsers/dsc_parser.py
index 31094d1e..463bf888 100644
--- a/edk2toollib/uefi/edk2/parsers/dsc_parser.py
+++ b/edk2toollib/uefi/edk2/parsers/dsc_parser.py
@@ -24,8 +24,15 @@ class DscParser(HashFileParser):
         OtherMods (list): list of other Mods that are not IA32, X64 specific
         Libs (list): list of Libs (the line in the file)
         LibsEnhanced (list): Better parsed (in a dict) list of Libs
+        ScopedLibraryDict (dict): key (library class) value (list of instances)
         LibraryClassToInstanceDict (dict): Key (Library class) Value (Instance)
         Pcds (list): List of Pcds
+
+    !!! note
+        ScopedLibraryDict can have multiple library instances for the same scope because the INF
+        can also filter the module types it supports. For example, two library instances could be
+        in the scope of "common", but one filters to only PEI (MyLib| PEI_CORE) and the other
+        filters to PEIM (MyLib| PEIM).
     """
     SECTION_LIBRARY = "libraryclasses"
     SECTION_COMPONENT = "components"
@@ -359,7 +366,10 @@ def _parse_libraries(self):
             for scope in current_scope:
                 key = f"{scope.strip()}.{lib.strip()}".lower()
                 value = instance.strip()
-                self.ScopedLibraryDict[key] = value
+                if key in self.ScopedLibraryDict and value not in self.ScopedLibraryDict[key]:
+                    self.ScopedLibraryDict[key].insert(0, value)
+                else:
+                    self.ScopedLibraryDict[key] = [value]
 
         return
 
diff --git a/edk2toollib/uefi/edk2/parsers/inf_parser.py b/edk2toollib/uefi/edk2/parsers/inf_parser.py
index 353e5118..c213e70a 100644
--- a/edk2toollib/uefi/edk2/parsers/inf_parser.py
+++ b/edk2toollib/uefi/edk2/parsers/inf_parser.py
@@ -12,7 +12,8 @@
 from edk2toollib.uefi.edk2.parsers.base_parser import HashFileParser
 
 AllPhases = ["SEC", "PEIM", "PEI_CORE", "DXE_DRIVER", "DXE_CORE", "DXE_RUNTIME_DRIVER", "UEFI_DRIVER",
-             "SMM_CORE", "DXE_SMM_DRIVER", "UEFI_APPLICATION"]
+             "SMM_CORE", "DXE_SMM_DRIVER", "UEFI_APPLICATION", "MM_STANDALONE", "MM_CORE_STANDALONE",
+             "HOST_APPLICATION",]
 
 
 class InfParser(HashFileParser):
diff --git a/tests.unit/database/common.py b/tests.unit/database/common.py
index 202a4959..7b32c57a 100644
--- a/tests.unit/database/common.py
+++ b/tests.unit/database/common.py
@@ -156,12 +156,13 @@ def __init__(self, ws: Path):
     def create_library(self, name: str, lib_cls: str, **kwargs):
         """Creates a Library INF in the empty tree."""
         path = self.library_folder / f'{name}.inf'
-        kwargs["defines"] = {
+        default = {
             "FILE_GUID": str(uuid.uuid4()),
             "MODULE_TYPE": "BASE",
             "BASE_NAME": name,
             "LIBRARY_CLASS": lib_cls,
         }
+        kwargs["defines"] = {**default, **kwargs.get("defines", {})}
         create_inf_file(path, **kwargs)
         self.library_list.append(str(path))
         return str(path.relative_to(self.ws))
diff --git a/tests.unit/database/test_component_query.py b/tests.unit/database/test_component_query.py
index 44bcfa45..d761fcc9 100644
--- a/tests.unit/database/test_component_query.py
+++ b/tests.unit/database/test_component_query.py
@@ -71,7 +71,7 @@ def test_simple_component(empty_tree: Tree):
     result = db.search(ComponentQuery(component = "TestDriver1"))
     assert len(result) == 1
 
-    assert sorted(result[0]['LIBRARIES_USED']) == sorted(['TestPkg/Library/TestLib2.inf', 'TestPkg/Library/TestLib3.inf'])
+    assert sorted(result[0]['LIBRARIES_USED']) == sorted([('TestCls','TestPkg/Library/TestLib2.inf'), ('NULL','TestPkg/Library/TestLib3.inf')])
 
     result = db.search(ComponentQuery(component = "NonExistantDriver"))
     assert len(result) == 0
diff --git a/tests.unit/database/test_instanced_fv_table.py b/tests.unit/database/test_instanced_fv_table.py
index 05ba7d9d..fbb13506 100644
--- a/tests.unit/database/test_instanced_fv_table.py
+++ b/tests.unit/database/test_instanced_fv_table.py
@@ -29,6 +29,7 @@ def test_valid_fdf(empty_tree: Tree):  # noqa: F811
     comp2 = empty_tree.create_component("TestDriver2", "DXE_DRIVER")
     comp3 = empty_tree.create_component("TestDriver3", "DXE_DRIVER")
     comp4 = str(Path('TestPkg','Extra Drivers','TestDriver4.inf'))
+    comp5 = empty_tree.create_component("TestDriver5", "DXE_DRIVER")
 
     dsc = empty_tree.create_dsc()
 
@@ -39,7 +40,8 @@ def test_valid_fdf(empty_tree: Tree):  # noqa: F811
             f"INF  {comp1}", # PP relative
             f'INF  {str(empty_tree.ws / comp2)}', # Absolute
             f'INF  RuleOverride=RESET_VECTOR {comp3}', # RuleOverride
-            f'INF  {comp4}' # Space in path
+            f'INF  {comp4}', # Space in path
+            f'INF  ruleoverride = RESET_VECTOR {comp5}', # RuleOverride lowercase & spaces
         ]
     )
 
@@ -62,5 +64,6 @@ def test_valid_fdf(empty_tree: Tree):  # noqa: F811
                 Path(comp1).as_posix(),
                 Path(comp2).as_posix(),
                 Path(comp3).as_posix(),
+                Path(comp5).as_posix(),
                 Path(comp4).as_posix(),
                 ])
diff --git a/tests.unit/database/test_instanced_inf_table.py b/tests.unit/database/test_instanced_inf_table.py
index aafe60ad..4469fd31 100644
--- a/tests.unit/database/test_instanced_inf_table.py
+++ b/tests.unit/database/test_instanced_inf_table.py
@@ -11,8 +11,8 @@
 from pathlib import Path
 
 import pytest
-from common import Tree, empty_tree  # noqa: F401
-from edk2toollib.database import Edk2DB
+from common import Tree, create_inf_file, empty_tree  # noqa: F401
+from edk2toollib.database import Edk2DB, Query
 from edk2toollib.database.tables import InstancedInfTable
 from edk2toollib.uefi.edk2.path_utilities import Edk2Path
 
@@ -128,8 +128,8 @@ def test_library_override(empty_tree: Tree):
     # Ensure the Test Driver is using TestLib2 from the override and the NULL library was added
     for row in db.table("instanced_inf").all():
         if (row["NAME"] == Path(comp1).stem
-            and Path(lib2).as_posix() in row["LIBRARIES_USED"]
-            and Path(lib3).as_posix() in row["LIBRARIES_USED"]):
+            and ("TestCls", Path(lib2).as_posix()) in row["LIBRARIES_USED"]
+            and ("NULL", Path(lib3).as_posix()) in row["LIBRARIES_USED"]):
             break
     else:
         assert False
@@ -172,7 +172,7 @@ def test_scoped_libraries1(empty_tree: Tree):
     # For each driver, verify that the the driver number (1, 2, 3) uses the corresponding lib number (1, 2, 3)
     for row in db.table("instanced_inf").all():
         if "COMPONENT" not in row:  # Only care about looking at drivers, which do not have a component
-            assert row["NAME"].replace("Driver", "Lib") in row["LIBRARIES_USED"][0]
+            assert row["NAME"].replace("Driver", "Lib") in row["LIBRARIES_USED"][0][1]
 
 def test_scoped_libraries2(empty_tree: Tree):
     """Ensure that the correct libraries in regards to scoping.
@@ -207,9 +207,9 @@ def test_scoped_libraries2(empty_tree: Tree):
 
     for row in db.table("instanced_inf").all():
         if "COMPONENT" not in row:
-            assert row["NAME"].replace("Driver", "Lib") in row["LIBRARIES_USED"][0]
+            assert row["NAME"].replace("Driver", "Lib") in row["LIBRARIES_USED"][0][1]
 
-def test_missing_library(empty_tree: Tree):
+def test_missing_library(empty_tree: Tree, caplog):
     """Test when a library is missing."""
     edk2path = Edk2Path(str(empty_tree.ws), [])
     db = Edk2DB(Edk2DB.MEM_RW, pathobj=edk2path)
@@ -227,5 +227,71 @@ def test_missing_library(empty_tree: Tree):
         "TARGET_ARCH": "IA32 X64",
         "TARGET": "DEBUG",
     })
-    with pytest.raises(RuntimeError):
+
+    with caplog.at_level(logging.WARNING):
         inf_table.parse(db)
+
+        assert len(caplog.records) == 1
+        assert 'testcls' in caplog.records[0].message
+
+def test_skip_library_with_unsupported_module_type(empty_tree: Tree):
+    """Library class INFs can specify what module types they support.
+
+    In situations where a library class is in the [LibraryClasses] section, it may not necessarily
+    support all module types as the LIBRARY_CLASS section of the INF may limit its supported
+    module types. This test ensures that a library instance is ignored if the library instance
+    itself states it does not support a module type.
+
+    i.e. LIBRARY_CLASS = TestCls| PEIM only supports PEIM's, even if it is in the [LibraryClasses] section.
+    """
+    edk2path = Edk2Path(str(empty_tree.ws), [])
+    db = Edk2DB(Edk2DB.MEM_RW, pathobj=edk2path)
+
+    testlib1 = empty_tree.create_library("TestLib1", "TestCls",
+        defines = {
+            "LIBRARY_CLASS": "TestCls| PEIM"
+        }
+    )
+    testlib2 = empty_tree.create_library("TestLib2", "TestCls")
+    comp1 = empty_tree.create_component("TestDriver1", "DXE_DRIVER", libraryclasses = ["TestCls"])
+
+    # Generate the DSC with testlib1 first
+    dsc1 = empty_tree.create_dsc(
+        libraryclasses = [
+            f'TestCls|{testlib1}',
+            f'TestCls|{testlib2}',
+
+        ],
+        components = [comp1],
+    )
+
+    # Generate the DSC with testlib2 first
+    dsc2 = empty_tree.create_dsc(
+        libraryclasses = [
+            f'TestCls|{testlib2}',
+            f'TestCls|{testlib1}',
+
+        ],
+        components = [comp1],
+    )
+
+    inf_table = InstancedInfTable(env = {
+        "ACTIVE_PLATFORM": dsc1,
+        "TARGET_ARCH": "X64",
+        "TARGET": "DEBUG",
+    })
+
+    inf_table.parse(db)
+
+    inf_table = InstancedInfTable(env = {
+        "ACTIVE_PLATFORM": dsc2,
+        "TARGET_ARCH": "X64",
+        "TARGET": "DEBUG",
+    })
+
+    inf_table.parse(db)
+
+    component_list = db.table("instanced_inf").search(~Query().COMPONENT.exists())
+    assert len(component_list) == 2
+    for component in component_list:
+        assert component["LIBRARIES_USED"][0] == ("TestCls", Path(testlib2).as_posix())