From 99b5bf584cb320061da8024a5ce1fd87475108bf Mon Sep 17 00:00:00 2001 From: Daniel Goldman Date: Sun, 28 Apr 2024 16:57:03 -0400 Subject: [PATCH] name_to_api_type_info is a LazyFrozenDict again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit not being lazy caused a measureable slowdown (1.022 s ± 0.054 s -> 3.184 s ± 0.071 s) in unrelated help goals (`pants help backends`) turning it lazy basically negates that (1.037 s ± 0.090 s) more performance gains could be realised by converting from attributes which are lazy but eagerly materialised to properties which are lazily materialised. That would require more rework --- src/python/pants/help/help_info_extracter.py | 59 +++++++++++++------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/src/python/pants/help/help_info_extracter.py b/src/python/pants/help/help_info_extracter.py index e2cd586467ba..fe85a3b475cc 100644 --- a/src/python/pants/help/help_info_extracter.py +++ b/src/python/pants/help/help_info_extracter.py @@ -10,6 +10,7 @@ from collections import defaultdict, namedtuple from dataclasses import dataclass from enum import Enum +from functools import reduce from itertools import chain from operator import attrgetter from pathlib import Path @@ -43,7 +44,7 @@ from pants.option.options import Options from pants.option.parser import OptionValueHistory, Parser from pants.option.scope import ScopeInfo -from pants.util.frozendict import FrozenDict, LazyFrozenDict +from pants.util.frozendict import LazyFrozenDict from pants.util.strutil import first_paragraph, strval T = TypeVar("T") @@ -336,6 +337,10 @@ class PluginAPITypeInfo: def fully_qualified_name(self) -> str: return f"{self.module}.{self.name}" + @staticmethod + def fully_qualified_name_from_type(t: type): + return f"{t.__module__}.{t.__qualname__}" + @classmethod def create( cls, api_type: type, rules: Sequence[Rule | UnionRule], **kwargs @@ -441,7 +446,7 @@ class AllHelpInfo: name_to_goal_info: LazyFrozenDict[str, GoalHelpInfo] name_to_target_type_info: LazyFrozenDict[str, TargetTypeHelpInfo] name_to_rule_info: LazyFrozenDict[str, RuleInfo] - name_to_api_type_info: FrozenDict[str, PluginAPITypeInfo] + name_to_api_type_info: LazyFrozenDict[str, PluginAPITypeInfo] name_to_backend_help_info: LazyFrozenDict[str, BackendHelpInfo] name_to_build_file_info: LazyFrozenDict[str, BuildFileSymbolHelpInfo] env_var_to_help_info: LazyFrozenDict[str, OptionHelpInfo] @@ -706,7 +711,7 @@ def load() -> RuleInfo: @classmethod def get_api_type_infos( cls, build_configuration: BuildConfiguration | None, union_membership: UnionMembership - ) -> FrozenDict[str, PluginAPITypeInfo]: + ) -> LazyFrozenDict[str, PluginAPITypeInfo]: if build_configuration is None: return LazyFrozenDict({}) @@ -833,27 +838,39 @@ def _extract_api_types() -> Iterator[tuple[type, str, tuple[type, ...]]]: ), ) - def get_api_type_info(api_type: type) -> PluginAPITypeInfo: - return PluginAPITypeInfo.create( - api_type, - rules, - provider=type_graph[api_type]["providers"], - dependencies=type_graph[api_type]["dependencies"], - dependents=type_graph[api_type].get("dependents", ()), - union_members=tuple( - sorted(member.__qualname__ for member in union_membership.get(api_type)) - ), - ) + def get_api_type_info(api_types: list[type]) -> PluginAPITypeInfo: + """ + Gather the info from each of the types and aggregate it. + The gathering is the expensive operation, and we can only aggregate once we've gathered. + """ + infos = [ + PluginAPITypeInfo.create( + api_type, + rules, + provider=type_graph[api_type]["providers"], + dependencies=type_graph[api_type]["dependencies"], + dependents=type_graph[api_type].get("dependents", ()), + union_members=tuple( + sorted(member.__qualname__ for member in union_membership.get(api_type)) + ), + ) + for api_type in api_types + ] + return reduce(lambda x, y: x.merged_with(y), infos) - infos: dict[str, PluginAPITypeInfo] = {} + # We want to provide a lazy dict so we don't spend so long doing the info gathering. + # We provide a list of the types here, and the lookup function performs the gather and the aggregation + names_to_types: dict[str, list[type]] = defaultdict(list) for api_type in sorted(all_types, key=attrgetter("__qualname__")): - api_type_info = get_api_type_info(api_type) - if api_type_info.fully_qualified_name in infos: - infos[api_type_info.fully_qualified_name] = infos[api_type_info.fully_qualified_name].merged_with(api_type_info) - else: - infos[api_type_info.fully_qualified_name] = api_type_info + names_to_types[PluginAPITypeInfo.fully_qualified_name_from_type(api_type)].append( + api_type + ) + + infos: dict[str, Callable[[], PluginAPITypeInfo]] = { + k: lambda: get_api_type_info(v) for k, v in names_to_types.items() + } - return FrozenDict(infos) + return LazyFrozenDict(infos) @classmethod def get_backend_help_info(cls, options: Options) -> LazyFrozenDict[str, BackendHelpInfo]: