From 2bf318fc2817e69d9f9b82e19573760037b58947 Mon Sep 17 00:00:00 2001 From: Manon Flageat <61653012+manon-but-yes@users.noreply.github.com> Date: Fri, 6 Sep 2024 10:59:40 +0100 Subject: [PATCH] feat: add reevaluation function to compute corrected archives in uncertain domains (#186) * feat: add reevaluation function to compute corrected archives in uncertain domains --- qdax/utils/uncertainty_metrics.py | 323 +++++++++++++++++++ tests/utils_test/uncertainty_metrics_test.py | 190 +++++++++++ 2 files changed, 513 insertions(+) create mode 100644 qdax/utils/uncertainty_metrics.py create mode 100644 tests/utils_test/uncertainty_metrics_test.py diff --git a/qdax/utils/uncertainty_metrics.py b/qdax/utils/uncertainty_metrics.py new file mode 100644 index 00000000..2dd61c18 --- /dev/null +++ b/qdax/utils/uncertainty_metrics.py @@ -0,0 +1,323 @@ +from functools import partial +from typing import Callable, Tuple + +import jax +import jax.numpy as jnp + +from qdax.core.containers.mapelites_repertoire import MapElitesRepertoire +from qdax.types import Descriptor, ExtraScores, Fitness, Genotype, RNGKey +from qdax.utils.sampling import ( + dummy_extra_scores_extractor, + median, + multi_sample_scoring_function, + std, +) + + +@partial( + jax.jit, + static_argnames=( + "scoring_fn", + "num_reevals", + "fitness_extractor", + "descriptor_extractor", + "extra_scores_extractor", + "scan_size", + ), +) +def reevaluation_function( + repertoire: MapElitesRepertoire, + random_key: RNGKey, + empty_corrected_repertoire: MapElitesRepertoire, + scoring_fn: Callable[ + [Genotype, RNGKey], + Tuple[Fitness, Descriptor, ExtraScores, RNGKey], + ], + num_reevals: int, + fitness_extractor: Callable[[jnp.ndarray], jnp.ndarray] = median, + descriptor_extractor: Callable[[jnp.ndarray], jnp.ndarray] = median, + extra_scores_extractor: Callable[ + [ExtraScores, int], ExtraScores + ] = dummy_extra_scores_extractor, + scan_size: int = 0, +) -> Tuple[MapElitesRepertoire, RNGKey]: + """ + Perform reevaluation of a repertoire and construct a corrected repertoire from it. + + Args: + repertoire: repertoire to reevaluate. + empty_corrected_repertoire: repertoire to be filled with reevaluated solutions, + allow to use a different type of repertoire than the one from the algorithm. + random_key: JAX random key. + scoring_fn: scoring function used for evaluation. + num_reevals: number of samples to generate for each individual. + fitness_extractor: function to extract the final fitness from + multiple samples of the same solution (default: median). + descriptor_extractor: function to extract the final descriptor from + multiple samples of the same solution (default: median). + extra_scores_extractor: function to extract the extra_scores from + multiple samples of the same solution (default: no effect). + scan_size: allow to split the reevaluations in multiple batch to reduce + the memory load of the reevaluation. + Returns: + The corrected repertoire and a random key. + """ + + # If no reevaluations, return copies of the original container + if num_reevals == 0: + return repertoire, random_key + + # Perform reevaluation + ( + all_fitnesses, + all_descriptors, + all_extra_scores, + random_key, + ) = _perform_reevaluation( + policies_params=repertoire.genotypes, + random_key=random_key, + scoring_fn=scoring_fn, + num_reevals=num_reevals, + scan_size=scan_size, + ) + + # Extract the final scores + extra_scores = extra_scores_extractor(all_extra_scores, num_reevals) + fitnesses = fitness_extractor(all_fitnesses) + descriptors = descriptor_extractor(all_descriptors) + + # Set -inf fitness for all unexisting indivs + fitnesses = jnp.where(repertoire.fitnesses == -jnp.inf, -jnp.inf, fitnesses) + + # Fill-in the corrected repertoire + corrected_repertoire = empty_corrected_repertoire.add( + batch_of_genotypes=repertoire.genotypes, + batch_of_descriptors=descriptors, + batch_of_fitnesses=fitnesses, + batch_of_extra_scores=extra_scores, + ) + + return corrected_repertoire, random_key + + +@partial( + jax.jit, + static_argnames=( + "scoring_fn", + "num_reevals", + "fitness_extractor", + "fitness_reproducibility_extractor", + "descriptor_extractor", + "descriptor_reproducibility_extractor", + "extra_scores_extractor", + "scan_size", + ), +) +def reevaluation_reproducibility_function( + repertoire: MapElitesRepertoire, + random_key: RNGKey, + empty_corrected_repertoire: MapElitesRepertoire, + scoring_fn: Callable[ + [Genotype, RNGKey], + Tuple[Fitness, Descriptor, ExtraScores, RNGKey], + ], + num_reevals: int, + fitness_extractor: Callable[[jnp.ndarray], jnp.ndarray] = median, + fitness_reproducibility_extractor: Callable[[jnp.ndarray], jnp.ndarray] = std, + descriptor_extractor: Callable[[jnp.ndarray], jnp.ndarray] = median, + descriptor_reproducibility_extractor: Callable[[jnp.ndarray], jnp.ndarray] = std, + extra_scores_extractor: Callable[ + [ExtraScores, int], ExtraScores + ] = dummy_extra_scores_extractor, + scan_size: int = 0, +) -> Tuple[MapElitesRepertoire, MapElitesRepertoire, MapElitesRepertoire, RNGKey]: + """ + Perform reevaluation of a repertoire and construct a corrected repertoire and a + reproducibility repertoire from it. + + Args: + repertoire: repertoire to reevaluate. + empty_corrected_repertoire: repertoire to be filled with reevaluated solutions, + allow to use a different type of repertoire than the one from the algorithm. + random_key: JAX random key. + scoring_fn: scoring function used for evaluation. + num_reevals: number of samples to generate for each individual. + fitness_extractor: function to extract the final fitness from + multiple samples of the same solution (default: median). + fitness_reproducibility_extractor: function to extract the fitness + reproducibility from multiple samples of the same solution (default: std). + descriptor_extractor: function to extract the final descriptor from + multiple samples of the same solution (default: median). + descriptor_reproducibility_extractor: function to extract the descriptor + reproducibility from multiple samples of the same solution (default: std). + extra_scores_extractor: function to extract the extra_scores from + multiple samples of the same solution (default: no effect). + scan_size: allow to split the reevaluations in multiple batch to reduce + the memory load of the reevaluation. + Returns: + The corrected repertoire. + A repertoire storing reproducibility in fitness. + A repertoire storing reproducibility in descriptor. + A random key. + """ + + # If no reevaluations, return copies of the original container + if num_reevals == 0: + return ( + repertoire, + repertoire, + repertoire, + random_key, + ) + + # Perform reevaluation + ( + all_fitnesses, + all_descriptors, + all_extra_scores, + random_key, + ) = _perform_reevaluation( + policies_params=repertoire.genotypes, + random_key=random_key, + scoring_fn=scoring_fn, + num_reevals=num_reevals, + scan_size=scan_size, + ) + + # Extract the final scores + extra_scores = extra_scores_extractor(all_extra_scores, num_reevals) + fitnesses = fitness_extractor(all_fitnesses) + fitnesses_reproducibility = fitness_reproducibility_extractor(all_fitnesses) + descriptors = descriptor_extractor(all_descriptors) + descriptors_reproducibility = descriptor_reproducibility_extractor(all_descriptors) + + # WARNING: in the case of descriptors_reproducibility, take average over dimensions + descriptors_reproducibility = jnp.average(descriptors_reproducibility, axis=-1) + + # Set -inf fitness for all unexisting indivs + fitnesses = jnp.where(repertoire.fitnesses == -jnp.inf, -jnp.inf, fitnesses) + fitnesses_reproducibility = jnp.where( + repertoire.fitnesses == -jnp.inf, -jnp.inf, fitnesses_reproducibility + ) + descriptors_reproducibility = jnp.where( + repertoire.fitnesses == -jnp.inf, -jnp.inf, descriptors_reproducibility + ) + + # Fill-in corrected repertoire + corrected_repertoire = empty_corrected_repertoire.add( + batch_of_genotypes=repertoire.genotypes, + batch_of_descriptors=descriptors, + batch_of_fitnesses=fitnesses, + batch_of_extra_scores=extra_scores, + ) + + # Fill-in fit_reproducibility repertoire + fit_reproducibility_repertoire = empty_corrected_repertoire.add( + batch_of_genotypes=repertoire.genotypes, + batch_of_descriptors=repertoire.descriptors, + batch_of_fitnesses=fitnesses_reproducibility, + batch_of_extra_scores=extra_scores, + ) + + # Fill-in desc_reproducibility repertoire + desc_reproducibility_repertoire = empty_corrected_repertoire.add( + batch_of_genotypes=repertoire.genotypes, + batch_of_descriptors=repertoire.descriptors, + batch_of_fitnesses=descriptors_reproducibility, + batch_of_extra_scores=extra_scores, + ) + + return ( + corrected_repertoire, + fit_reproducibility_repertoire, + desc_reproducibility_repertoire, + random_key, + ) + + +@partial( + jax.jit, + static_argnames=( + "scoring_fn", + "num_reevals", + "scan_size", + ), +) +def _perform_reevaluation( + policies_params: Genotype, + random_key: RNGKey, + scoring_fn: Callable[ + [Genotype, RNGKey], + Tuple[Fitness, Descriptor, ExtraScores, RNGKey], + ], + num_reevals: int, + scan_size: int = 0, +) -> Tuple[Fitness, Descriptor, ExtraScores, RNGKey]: + """ + Sub-function used to perform reevaluation of a repertoire in uncertain applications. + + Args: + policies_params: genotypes to reevaluate. + random_key: JAX random key. + scoring_fn: scoring function used for evaluation. + num_reevals: number of samples to generate for each individual. + scan_size: allow to split the reevaluations in multiple batch to reduce + the memory load of the reevaluation. + Returns: + The fitnesses, descriptors and extra score from the reevaluation, + and a randon key. + """ + + # If no need for scan, call the sampling function + if scan_size == 0: + ( + all_fitnesses, + all_descriptors, + all_extra_scores, + random_key, + ) = multi_sample_scoring_function( + policies_params=policies_params, + random_key=random_key, + scoring_fn=scoring_fn, + num_samples=num_reevals, + ) + + # If need for scan, call the sampling function multiple times + else: + + # Ensure that num_reevals is a multiple of scan_size + assert ( + num_reevals % scan_size == 0 + ), "num_reevals should be a multiple of scan_size to be able to scan." + num_loops = num_reevals // scan_size + + def _sampling_scan( + random_key: RNGKey, + unused: Tuple[()], + ) -> Tuple[Tuple[RNGKey], Tuple[Fitness, Descriptor, ExtraScores]]: + ( + all_fitnesses, + all_descriptors, + all_extra_scores, + random_key, + ) = multi_sample_scoring_function( + policies_params=policies_params, + random_key=random_key, + scoring_fn=scoring_fn, + num_samples=scan_size, + ) + return (random_key), ( + all_fitnesses, + all_descriptors, + all_extra_scores, + ) + + (random_key), ( + all_fitnesses, + all_descriptors, + all_extra_scores, + ) = jax.lax.scan(_sampling_scan, (random_key), (), length=num_loops) + all_fitnesses = jnp.hstack(all_fitnesses) + all_descriptors = jnp.hstack(all_descriptors) + + return all_fitnesses, all_descriptors, all_extra_scores, random_key diff --git a/tests/utils_test/uncertainty_metrics_test.py b/tests/utils_test/uncertainty_metrics_test.py new file mode 100644 index 00000000..d49e2527 --- /dev/null +++ b/tests/utils_test/uncertainty_metrics_test.py @@ -0,0 +1,190 @@ +import functools + +import jax +import jax.numpy as jnp +import pytest + +from qdax.core.containers.mapelites_repertoire import ( + MapElitesRepertoire, + compute_cvt_centroids, +) +from qdax.tasks.arm import arm_scoring_function, noisy_arm_scoring_function +from qdax.utils.uncertainty_metrics import ( + reevaluation_function, + reevaluation_reproducibility_function, +) + + +def test_uncertainty_metrics() -> None: + seed = 42 + num_reevals = 512 + scan_size = 128 + batch_size = 512 + num_init_cvt_samples = 50000 + num_centroids = 1024 + genotype_dim = 8 + + # Init a random key + random_key = jax.random.PRNGKey(seed) + + # First, init a deterministic environment + init_policies = jax.random.uniform( + random_key, shape=(batch_size, genotype_dim), minval=0, maxval=1 + ) + fitnesses, descriptors, extra_scores, random_key = arm_scoring_function( + init_policies, random_key + ) + + # Initialise a container + centroids, random_key = compute_cvt_centroids( + num_descriptors=2, + num_init_cvt_samples=num_init_cvt_samples, + num_centroids=num_centroids, + minval=jnp.array([0.0, 0.0]), + maxval=jnp.array([1.0, 1.0]), + random_key=random_key, + ) + repertoire = MapElitesRepertoire.init( + genotypes=init_policies, + fitnesses=fitnesses, + descriptors=descriptors, + centroids=centroids, + extra_scores=extra_scores, + ) + + # Initialise an empty container for corrected repertoire + fitnesses = jnp.full_like(fitnesses, -jnp.inf) + empty_corrected_repertoire = MapElitesRepertoire.init( + genotypes=init_policies, + fitnesses=fitnesses, + descriptors=descriptors, + centroids=centroids, + extra_scores=extra_scores, + ) + + # Test that reevaluation_function accurately predicts no change + corrected_repertoire, random_key = reevaluation_function( + repertoire=repertoire, + empty_corrected_repertoire=empty_corrected_repertoire, + scoring_fn=arm_scoring_function, + num_reevals=num_reevals, + random_key=random_key, + ) + pytest.assume( + jnp.allclose( + corrected_repertoire.fitnesses, repertoire.fitnesses, rtol=1e-05, atol=1e-05 + ) + ) + + # Test that scanned reevaluation_function accurately predicts no change + corrected_repertoire, random_key = reevaluation_function( + repertoire=repertoire, + empty_corrected_repertoire=empty_corrected_repertoire, + scoring_fn=arm_scoring_function, + num_reevals=num_reevals, + random_key=random_key, + scan_size=scan_size, + ) + pytest.assume( + jnp.allclose( + corrected_repertoire.fitnesses, repertoire.fitnesses, rtol=1e-05, atol=1e-05 + ) + ) + + # Test that reevaluation_reproducibility_function accurately predicts no change + ( + corrected_repertoire, + fit_reproducibility_repertoire, + desc_reproducibility_repertoire, + random_key, + ) = reevaluation_reproducibility_function( + repertoire=repertoire, + empty_corrected_repertoire=empty_corrected_repertoire, + scoring_fn=arm_scoring_function, + num_reevals=num_reevals, + random_key=random_key, + ) + pytest.assume( + jnp.allclose( + corrected_repertoire.fitnesses, repertoire.fitnesses, rtol=1e-05, atol=1e-05 + ) + ) + zero_fitnesses = jnp.where( + repertoire.fitnesses > -jnp.inf, + 0.0, + -jnp.inf, + ) + pytest.assume( + jnp.allclose( + fit_reproducibility_repertoire.fitnesses, + zero_fitnesses, + rtol=1e-05, + atol=1e-05, + ) + ) + pytest.assume( + jnp.allclose( + desc_reproducibility_repertoire.fitnesses, + zero_fitnesses, + rtol=1e-05, + atol=1e-05, + ) + ) + + # Second, init a stochastic environment + init_policies = jax.random.uniform( + random_key, shape=(batch_size, genotype_dim), minval=0, maxval=1 + ) + noisy_scoring_function = functools.partial( + noisy_arm_scoring_function, + fit_variance=0.01, + desc_variance=0.01, + params_variance=0.0, + ) + fitnesses, descriptors, extra_scores, random_key = noisy_scoring_function( + init_policies, random_key + ) + + # Initialise a container + centroids, random_key = compute_cvt_centroids( + num_descriptors=2, + num_init_cvt_samples=num_init_cvt_samples, + num_centroids=num_centroids, + minval=jnp.array([0.0, 0.0]), + maxval=jnp.array([1.0, 1.0]), + random_key=random_key, + ) + repertoire = MapElitesRepertoire.init( + genotypes=init_policies, + fitnesses=fitnesses, + descriptors=descriptors, + centroids=centroids, + extra_scores=extra_scores, + ) + + # Initialise an empty container for corrected repertoire + fitnesses = jnp.full_like(fitnesses, -jnp.inf) + empty_corrected_repertoire = MapElitesRepertoire.init( + genotypes=init_policies, + fitnesses=fitnesses, + descriptors=descriptors, + centroids=centroids, + extra_scores=extra_scores, + ) + + # Test that reevaluation_function runs and keeps at least one solution + ( + corrected_repertoire, + fit_reproducibility_repertoire, + desc_reproducibility_repertoire, + random_key, + ) = reevaluation_reproducibility_function( + repertoire=repertoire, + empty_corrected_repertoire=empty_corrected_repertoire, + scoring_fn=noisy_scoring_function, + num_reevals=num_reevals, + random_key=random_key, + ) + pytest.assume(jnp.any(corrected_repertoire.fitnesses > -jnp.inf)) + pytest.assume(jnp.any(fit_reproducibility_repertoire.fitnesses > -jnp.inf)) + pytest.assume(jnp.any(desc_reproducibility_repertoire.fitnesses > -jnp.inf))