Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reduce cost of large variant matrix #5392

Merged
merged 10 commits into from
Sep 18, 2024
23 changes: 17 additions & 6 deletions conda_build/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@

if TYPE_CHECKING:
from pathlib import Path
from typing import Any
from typing import Any, TypeVar

T = TypeVar("T")

invocation_time = ""

Expand Down Expand Up @@ -821,14 +823,23 @@ def clean_pkgs(self):

def copy(self) -> Config:
new = copy.copy(self)
# Use picke.loads(pickle.dumps(...) as a faster copy.deepcopy alternative.
new.variant = pickle.loads(pickle.dumps(self.variant, pickle.HIGHEST_PROTOCOL))
new.variant = self._copy_variants(self.variant)
if hasattr(self, "variants"):
new.variants = pickle.loads(
pickle.dumps(self.variants, pickle.HIGHEST_PROTOCOL)
)
new.variants = self.copy_variants()
return new

def _copy_variants(self, variant_or_list: T) -> T:
"""Efficient deep copy used for variant dicts and lists"""
# Use pickle.loads(pickle.dumps(...) as a faster copy.deepcopy alternative.
return pickle.loads(pickle.dumps(variant_or_list, pickle.HIGHEST_PROTOCOL))

def copy_variants(self) -> list[dict] | None:
"""Return deep copy of the variants list, if any"""
if getattr(self, "variants", None) is not None:
return self._copy_variants(self.variants)
else:
return None

# context management - automatic cleanup if self.dirty or self.keep_old_work is not True
def __enter__(self):
pass
Expand Down
10 changes: 5 additions & 5 deletions conda_build/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2288,7 +2288,6 @@ def validate_features(self):
def copy(self: Self) -> MetaData:
new = copy.copy(self)
new.config = self.config.copy()
new.config.variant = copy.deepcopy(self.config.variant)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

config.copy on the line before already does exactly this, no need to do it twice

new.meta = copy.deepcopy(self.meta)
new.type = getattr(
self, "type", "conda_v2" if self.config.conda_pkg_format == "2" else "conda"
Expand Down Expand Up @@ -2672,15 +2671,16 @@ def get_output_metadata_set(
_check_run_constrained(output_tuples)
return output_tuples

def get_loop_vars(self):
return get_vars(getattr(self.config, "input_variants", self.config.variants))
def get_loop_vars(self, subset=None):
return get_vars(
getattr(self.config, "input_variants", self.config.variants), subset=subset
)

def get_used_loop_vars(self, force_top_level=False, force_global=False):
loop_vars = self.get_loop_vars()
used_vars = self.get_used_vars(
force_top_level=force_top_level, force_global=force_global
)
return set(loop_vars).intersection(used_vars)
return self.get_loop_vars(subset=used_vars)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_loop_vars is far cheaper if we pass a subset to consider instead of computing the (usually quite small) intersection after looping over all variables across all variants.


def get_rendered_recipe_text(
self, permit_undefined_jinja=False, extract_pattern=None
Expand Down
11 changes: 11 additions & 0 deletions conda_build/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,12 +835,20 @@ def distribute_variants(
used_variables = metadata.get_used_loop_vars(force_global=False)
top_loop = metadata.get_reduced_variant_set(used_variables)

# defer potentially expensive copy of input variants list
# until after reduction of the list for each variant
# since the initial list can be very long
all_variants = metadata.config.variants
metadata.config.variants = []

for variant in top_loop:
from .build import get_all_replacements

get_all_replacements(variant)
mv = metadata.copy()
mv.config.variant = variant
# start with shared list:
mv.config.variants = all_variants

pin_run_as_build = variant.get("pin_run_as_build", {})
if mv.numpy_xx and "numpy" not in pin_run_as_build:
Expand All @@ -860,6 +868,9 @@ def distribute_variants(
)
or mv.config.variants
)
# copy variants before we start modifying them,
# but after we've reduced the list via the conform_dict filter
mv.config.variants = mv.config.copy_variants()
get_all_replacements(mv.config.variants)
pin_run_as_build = variant.get("pin_run_as_build", {})
if mv.numpy_xx and "numpy" not in pin_run_as_build:
Expand Down
13 changes: 9 additions & 4 deletions conda_build/variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,10 @@ def get_package_variants(recipedir_or_metadata, config=None, variants=None):
return filter_combined_spec_to_used_keys(combined_spec, specs=specs)


def get_vars(variants: Iterable[dict[str, Any]]) -> set[str]:
def get_vars(
variants: Iterable[dict[str, Any]],
subset: set[str] | None = None,
) -> set[str]:
"""For purposes of naming/identifying, provide a way of identifying which variables contribute
to the matrix dimensionality"""
first, *others = variants
Expand All @@ -710,10 +713,12 @@ def get_vars(variants: Iterable[dict[str, Any]]) -> set[str]:
"ignore_version",
*ensure_list(first.get("extend_keys")),
}
to_consider = set(first)
if subset is not None:
to_consider.intersection_update(subset)
to_consider.difference_update(special_keys)
return {
var
for var in set(first) - special_keys
if any(first[var] != other[var] for other in others)
var for var in to_consider if any(first[var] != other[var] for other in others)
}


Expand Down
3 changes: 3 additions & 0 deletions news/5392-variant-copy
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
### Enhancements

* Reduce render time when there is a large number of unused variants. (#5392)
Loading
Loading