diff --git a/src/olympia/blocklist/mlbf.py b/src/olympia/blocklist/mlbf.py index 651de346847..235643deced 100644 --- a/src/olympia/blocklist/mlbf.py +++ b/src/olympia/blocklist/mlbf.py @@ -53,41 +53,29 @@ def generate_mlbf(stats, blocked, not_blocked): return cascade +# Extends the BlockType enum to include versions that have no block of any type class MLBFDataType(Enum): - HARD_BLOCKED = 'blocked' - # SOFT_BLOCKED = 'soft_blocked' + BLOCKED = BlockType.BLOCKED.label + SOFT_BLOCKED = BlockType.SOFT_BLOCKED.label NOT_BLOCKED = 'not_blocked' -def fetch_blocked_from_db(): - qs = BlockVersion.objects.filter( - version__file__is_signed=True, block_type=BlockType.BLOCKED - ).values_list('block__guid', 'version__version', 'version_id', named=True) - return set(qs) - - -def fetch_all_versions_from_db(excluding_version_ids=None): - from olympia.versions.models import Version - - qs = Version.unfiltered.exclude(id__in=excluding_version_ids or ()).values_list( - 'addon__addonguid__guid', 'version' - ) - return set(qs) - - class BaseMLBFLoader: def __init__(self, storage: SafeStorage): self.storage = storage + def data_type_key(self, key: MLBFDataType) -> str: + return f'{key.name.lower()}_items' + @cached_property def _raw(self): """ raw serializable data for the given MLBFLoader. """ - return {key.value: self[key] for key in MLBFDataType} + return {self.data_type_key(key): self[key] for key in MLBFDataType} def __getitem__(self, key: MLBFDataType) -> List[str]: - return getattr(self, f'{key.value}_items') + return getattr(self, self.data_type_key(key)) @cached_property def _cache_path(self): @@ -97,6 +85,10 @@ def _cache_path(self): def blocked_items(self) -> List[str]: raise NotImplementedError + @cached_property + def soft_blocked_items(self) -> List[str]: + raise NotImplementedError + @cached_property def not_blocked_items(self) -> List[str]: raise NotImplementedError @@ -110,51 +102,62 @@ def __init__(self, storage: SafeStorage): @cached_property def blocked_items(self) -> List[str]: - return self._data.get(MLBFDataType.HARD_BLOCKED.value) + return self._data.get(self.data_type_key(MLBFDataType.BLOCKED)) + + @cached_property + def soft_blocked_items(self) -> List[str]: + return self._data.get(self.data_type_key(MLBFDataType.SOFT_BLOCKED)) @cached_property def not_blocked_items(self) -> List[str]: - return self._data.get(MLBFDataType.NOT_BLOCKED.value) + return self._data.get(self.data_type_key(MLBFDataType.NOT_BLOCKED)) class MLBFDataBaseLoader(BaseMLBFLoader): - def __init__(self, storage: SafeStorage): - super().__init__(storage) - self._version_excludes = [] - - # TODO: there is an edge case where you create a new filter from - # a previously used time stamp. THis could lead to invalid files - # a filter using the DB should either clear the storage files - # or raise to not allow reusing the same time stamp. - # it is possibly debatable whether you should be able to - # determine the created_at time as an argument at all - - # Save the raw data to storage to be used by later instances - # of this filter. + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) with self.storage.open(self._cache_path, 'w') as f: json.dump(self._raw, f) @cached_property - def blocked_items(self) -> List[str]: - blocked = [] + def _all_blocks(self): + return BlockVersion.objects.filter(version__file__is_signed=True).values_list( + 'block__guid', 'version__version', 'version_id', 'block_type', named=True + ) + + def _format_blocks(self, block_type: BlockType) -> List[str]: + return MLBF.hash_filter_inputs( + [ + (version.block__guid, version.version__version) + for version in self._all_blocks + if version.block_type == block_type + ] + ) - for blocked_version in fetch_blocked_from_db(): - blocked.append( - (blocked_version.block__guid, blocked_version.version__version) - ) - self._version_excludes.append(blocked_version.version_id) + @cached_property + def blocked_items(self) -> List[str]: + return self._format_blocks(BlockType.BLOCKED) - return MLBF.hash_filter_inputs(blocked) + @cached_property + def soft_blocked_items(self) -> List[str]: + return self._format_blocks(BlockType.SOFT_BLOCKED) @cached_property def not_blocked_items(self) -> List[str]: - # see blocked_items - we need self._version_excludes populated - blocked_items = self.blocked_items + from olympia.versions.models import Version + + all_blocks_ids = [version.version_id for version in self._all_blocks] + not_blocked = MLBF.hash_filter_inputs( + Version.unfiltered.exclude( + id__in=all_blocks_ids or () + ).values_list('addon__addonguid__guid', 'version') + ) + # even though we exclude all the version ids in the query there's an # edge case where the version string occurs twice for an addon so we - # ensure not_blocked_items doesn't contain any blocked_items. - return MLBF.hash_filter_inputs( - fetch_all_versions_from_db(self._version_excludes) - set(blocked_items) + # ensure not_blocked_items doesn't contain any blocked_items or soft_blocked_items. + return list( + set(not_blocked) - set(self.blocked_items + self.soft_blocked_items) ) diff --git a/src/olympia/blocklist/tests/test_mlbf.py b/src/olympia/blocklist/tests/test_mlbf.py index 5e31df14b69..0ac4011375a 100644 --- a/src/olympia/blocklist/tests/test_mlbf.py +++ b/src/olympia/blocklist/tests/test_mlbf.py @@ -47,6 +47,10 @@ class TestStaticLoader(BaseMLBFLoader): def blocked_items(self): return ['blocked:version'] + @cached_property + def soft_blocked_items(self): + return ['softblocked:version'] + @cached_property def not_blocked_items(self): return ['notblocked:version'] @@ -74,8 +78,9 @@ def notblocked_items(self): def test_raw_contains_correct_data(self): loader = self.TestStaticLoader(self.storage) assert loader._raw == { - 'blocked': ['blocked:version'], - 'not_blocked': ['notblocked:version'], + 'blocked_items': ['blocked:version'], + 'soft_blocked_items': ['softblocked:version'], + 'not_blocked_items': ['notblocked:version'], } def test_invalid_key_access_raises(self): @@ -84,32 +89,33 @@ def test_invalid_key_access_raises(self): with self.assertRaises(AttributeError): loader['invalid'] with self.assertRaises(AttributeError): - loader[BlockType.BLOCKED] + loader[BlockType.BANANA] def test_valid_key_access_returns_expected_data(self): loader = self.TestStaticLoader(self.storage) - assert loader[MLBFDataType.HARD_BLOCKED] == ['blocked:version'] + assert loader[MLBFDataType.BLOCKED] == ['blocked:version'] assert loader[MLBFDataType.NOT_BLOCKED] == ['notblocked:version'] def test_cache_raw_data(self): loader = self.TestStaticLoader(self.storage) for data_type in MLBFDataType: - assert loader[data_type] == loader._raw[data_type.value] + assert loader[data_type] == loader._raw[loader.data_type_key(data_type)] # The source of truth should ultimately be the named cached properties # Even though _raw is cached, it should still return # the reference to the named property loader.blocked_items = [] - assert loader[MLBFDataType.HARD_BLOCKED] == [] + assert loader[MLBFDataType.BLOCKED] == [] class TestMLBFStorageLoader(_MLBFBase): def setUp(self): super().setUp() self._data = { - 'blocked': ['blocked:version'], - 'not_blocked': ['notblocked:version'], + 'blocked_items': ['blocked:version'], + 'not_blocked_items': ['notblocked:version'], + 'soft_blocked_items': ['softblocked:version'], } def test_raises_missing_file(self): @@ -138,7 +144,7 @@ def test_load_returns_expected_data(self): ) mlbf_data = MLBFDataBaseLoader(self.storage) - assert mlbf_data[MLBFDataType.HARD_BLOCKED] == MLBF.hash_filter_inputs( + assert mlbf_data[MLBFDataType.BLOCKED] == MLBF.hash_filter_inputs( [(block_version.block.guid, block_version.version.version)] ) assert mlbf_data[MLBFDataType.NOT_BLOCKED] == MLBF.hash_filter_inputs( @@ -156,7 +162,12 @@ def test_blocked_items_caches_excluded_version_ids(self): ) with self.assertNumQueries(2): mlbf_data = MLBFDataBaseLoader(self.storage) - assert mlbf_data._version_excludes == [block_version.version.id] + assert ( + MLBF.hash_filter_inputs( + [(block_version.block.guid, block_version.version.version)] + ) + not in mlbf_data.blocked_items + ) def test_hash_filter_inputs_returns_set_of_hashed_strings(self): """ @@ -365,3 +376,29 @@ def test_generate_filter_not_raises_if_all_versions_blocked(self): self._blocked_addon(file_kw={'is_signed': False}) assert mlbf.data.not_blocked_items == [] mlbf.generate_and_write_filter() + + def test_no_soft_blocked_versions_empty_array(self): + addon, block = self._blocked_addon() + self._block_version(block, self._version(addon), block_type=BlockType.BLOCKED) + mlbf = MLBF.generate_from_db('test') + assert ( + BlockVersion.objects.filter( + block_type=BlockType.SOFT_BLOCKED, + version__file__is_signed=True, + ).count() + == 0 + ) + assert mlbf.data.soft_blocked_items == [] + + def test_no_hard_blocked_versions_empty_array(self): + addon, block = self._blocked_addon() + self._block_version(block, self._version(addon), block_type=BlockType.SOFT_BLOCKED) + mlbf = MLBF.generate_from_db('test') + assert ( + BlockVersion.objects.filter( + block_type=BlockType.BLOCKED, + version__file__is_signed=True, + ).count() + == 0 + ) + assert mlbf.data.blocked_items == []