Skip to content

Commit

Permalink
test suite: database update test framework + simple tests
Browse files Browse the repository at this point in the history
See the test/README.md entry for details.
  • Loading branch information
Noiredd committed Apr 26, 2020
1 parent 69e6a8d commit 52d2dc8
Show file tree
Hide file tree
Showing 3 changed files with 276 additions and 2 deletions.
12 changes: 11 additions & 1 deletion filmatyk/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,25 @@ def __init__(self, itemtype:str, api:object, callback):
self.itemtype = itemtype
self.callback = callback
self.items = []
self.ids = set() # TODO: optimize using cached IDs
self.api = api
self.isDirty = False # are there any changes that need to be saved?

# INTERFACE
def getItems(self):
return self.items.copy()

def getItemByID(self, id:int):
for item in self.items:
if item.getRawProperty('id') == id:
return item

def __iter__(self):
return self.items.__iter__()

# Serialization-deserialization
@staticmethod
def restoreFromString(itemtype:object, string:str, api:object, callback):
def restoreFromString(itemtype:str, string:str, api:object, callback):
newDatabase = Database(itemtype, api, callback)
if not string:
# simply return a raw, empty DB
Expand All @@ -29,8 +36,10 @@ def restoreFromString(itemtype:object, string:str, api:object, callback):
itemclass = containers.classByString[itemtype]
newDatabase.items = [itemclass(**dct) for dct in listOfDicts]
return newDatabase

def storeToString(self):
return json.dumps([item.asDict() for item in self.items])

# Data acquisition
def softUpdate(self):
self.callback(0) #display the progress bar
Expand Down Expand Up @@ -74,6 +83,7 @@ def softUpdate(self):
self.callback(-1)
self.isDirty = True
return True

def hardUpdate(self):
# in theory, this removes all existing items and recollects the whole data
# but in practice this reacquisition may fail - in which case we shouldn't
Expand Down
36 changes: 35 additions & 1 deletion test/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## Filmatyk - test suite

### API tests
[`test.api`](test_api.py) performs tests of the `FilmwebAPI` class
[`test_api.py`](test_api.py) performs tests of the `FilmwebAPI` class
([`filmweb.py`](../filmatyk/filmweb.py)) - fetching and parsing content from Filmweb.

Testing the whole program would typically require logging in to Filmweb.pl,
Expand Down Expand Up @@ -35,3 +35,37 @@ while their ratings are elsewhere.
Tests are done sequentially, from locating the sources for data,
through parsing a single entity, to parsing a complete page.

### Database tests
[`test_database.py`](test_database.py) performs tests of the `Database` class
([`database.py`](../filmatyk/database.py)) - updating and serialization (TODO).

For speed and convenience, the update is not performed online.
Instead, a fake API object is created (`FakeAPI`) that mimmicks the normal `FilmwebAPI` behavior,
except instead of connecting to Filmweb.pl it reads data cached previously by the API tests.

For debugging and development, a `DatabaseDifference` class is introduced
to encapsulate the notion of, well, difference between two `Database` objects.
This object is constructed by a static method `compute` and is `bool`-convertible,
which allows implementing a `!=` operator for the `Database`.
Additionally, it is `str`-convertible, allowing nice printing for explaining the difference.
Before the tests are run, `DatabaseDifference.compute` is injected into the `Database`
as the `__ne__` operator, so all comparisons during the testing are done this way.

*Note*: `Database` is only tested for the `Movie` item type,
as the algorithm is abstract with respect to the item type.

#### Update tests

Database update tests are performed by loading a reference `Database` first
(this utilizes assets cached in API tests).
Each test simulates a situation in which this reference `Database` is the desired outcome,
and the "current" state is generated dynamically according to some scenario.
`UpdateScenario` encapsulates the idea of such scenario,
for example "the current state misses the first two items" could be expressed by:
`scenario = UpdateScenario(removals=[0, 1])`
The function `makeModifiedDatabase` ingests such `Scenario` object
and constructs a new `Database`, simulating a previous state (as if "undoing" an update).
During the test, this "undone" object attempts to update itself to the reference state.
Results are compared using the aforementioned `DatabaseDifference`.


230 changes: 230 additions & 0 deletions test/test_database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
import os
import sys
from typing import List, Set, Tuple
import unittest

sys.path.append(os.path.join('..', 'filmatyk'))
import containers
import database
import filmweb

from api_prototype import loadDatabase, getApi

class DatabaseDifference():
"""Represents a difference between two DBs.
Can be constructed using the "compute" @staticmethod, which can be used to
replace the __ne__ (!=) operator on the Database class. This way, comparing
(db1 != db2) returns an instance of this class, which:
* holds detailed information on difference, specifically two sets of IDs (one
for objects present in db1 and not in db2, other for vice-versa) and a list
of all differing Items,
* is bool-convertible, allowing its usage in if clauses,
* has a __repr__ so can be pretty printed.
Example usage:
db1:database.Database
db2:database.Database
diff = db1 != db2
print(diff)
"""
@staticmethod
def ne_to_eq(a, b):
"""Since overriding __ne__ by "compute" makes more sense than __eq__,
we invert != to obtain ==, not the other way around.
"""
return not (a != b)

@staticmethod
def compute(db1, db2):
"""Finds the difference between the two objects."""
# Work with IDs only
ids1 = set(item.getRawProperty('id') for item in db1)
ids2 = set(item.getRawProperty('id') for item in db2)
# Compute differences
common_ids = ids1.intersection(ids2)
only_in_1 = ids1.difference(common_ids)
only_in_2 = ids2.difference(common_ids)
# Extract Item instances for pretty printing
items_1 = [item for item in db1 if item.getRawProperty('id') in only_in_1]
items_2 = [item for item in db2 if item.getRawProperty('id') in only_in_2]
return DatabaseDifference(only_in_1, only_in_2, items_1+items_2)

def __init__(self, ids1:Set[int], ids2:Set[int], items:List[containers.Item]):
self.ids1 = ids1
self.ids2 = ids2
self.items = {item.getRawProperty('id'): item for item in items}
self.equal = len(self.ids1) == 0 and len(self.ids2) == 0

def __str__(self):
if self.equal:
return 'These databases are equal!'
else:
lines = []
if self.ids1:
lines.append('These {} IDs were present only in DB1:'.format(len(self.ids1)))
lines.extend('\t{} ({})'.format(i, self.items[i]['title']) for i in self.ids1)
if self.ids2:
lines.append('These {} IDs were present only in DB2:'.format(len(self.ids2)))
lines.extend('\t{} ({})'.format(i, self.items[i]['title']) for i in self.ids2)
return '\n'.join(lines)

def __repr__(self):
print(self)

def __bool__(self):
return not self.equal


class FakeAPI(filmweb.FilmwebAPI):
"""Loads cached data instead of connecting online.
When initializing, will look for HTML files in the given directory and treat
them as "pages" to load data from, later when emulating "getItemsPage".
"""
def __init__(self, src_path:str='', itemtype:str='Movie'):
super(FakeAPI, self).__init__(None)
self.src_path = src_path
self.page_paths = self.initPages()
self.item_count, self.items_per_page = self.initAnalyze(itemtype)

def initPages(self):
"""Finds HTML files with movie ratings cached by the API tests."""
if not os.path.exists(self.src_path):
return []
pages = [
item.path for item in os.scandir(self.src_path)
if item.name.endswith('.html') and item.name.startswith('movies_')
]
return pages

def initAnalyze(self, itemtype:str):
"""Checks how many items are in the stored files, and how many per page."""
counts = []
for path in self.page_paths:
page = self.fetchPage(path)
items = self.parsePage(page, itemtype)
counts.append(len(items))
# Return in the same format as getNumOf.
# The first page will either have exactly as many items as any other page,
# or will contain all items - in either case its length being the count of
# items per page.
return sum(counts), counts[0]

def checkSession(self):
"""First part of the hack - don't bother with the session at all."""
return True

def fetchPage(self, path:str):
"""Load HTML from file instead of URL."""
with open(path, 'r', encoding='utf-8') as html:
page = filmweb.BS(html.read(), features='lxml')
return page

def getItemsPage(self, itemtype:str, page:int=1):
"""Hack to use cached HTMLs instead of online session."""
path = self.page_paths[page - 1]
#path = os.path.join(self.src_path, 'movies_{}.html'.format(page))
page = self.fetchPage(path)
items = self.parsePage(page, itemtype)
return items

def getNumOf(self, itemtype:str):
"""Simply return the values we have computed earlier (initAnalyze)."""
return self.item_count, self.items_per_page


class UpdateScenario():
"""Database modification scenario to obtain a simulated previous state.
Contains:
* a list of Item indices to remove from the Database - a new Database created
via this removal will look like these items were yet to be added,
* a list of tuples of Item indices and IDs to add to the Database - simulates
removal of items in the same manner.
"""
def __init__(self, removals:List[int]=[], additions:List[Tuple[int,int]]=[]):
self.removals = removals
self.additions = additions


class TestDatabaseUpdates(unittest.TestCase):
"""Test Database updates capability in different initial conditions.
Each test consists of the following 3 steps:
* load an original Database,
* perform some change to its content, simulating some earlier point in time
(e.g. where some Items were not yet present),
* call a soft update.
The desired result is a Database back in the original state. Any differences
are considered failures.
The update itself is performed via a proxy, which loads data cached from
earlier tests instead of requiring a live and authenticated session.
"""
@classmethod
def setUpClass(self):
self.api = FakeAPI('data')
# Create the original database
self.orig_db = database.Database(
itemtype='Movie', api=self.api, callback=lambda x: x
)
# Fill it with available cached data
for i in range(len(self.api.page_paths)):
self.orig_db.items += self.api.getItemsPage('Movie', page=i+1)

@classmethod
def makeModifiedDatabase(self, scenario:UpdateScenario):
"""Creates a new DB by modifying the copy according to the scenario."""
# Create a bare new instance
new_db = database.Database(
itemtype=self.orig_db.itemtype,
api=self.orig_db.api,
callback=self.orig_db.callback,
)
# Remove items according to the scenario
new_db.items = [
item for i, item in enumerate(self.orig_db.items)
if i not in scenario.removals
]
# Add new items according to the scenario
# The items are all clones of the last available item, with changed ID
template = new_db.items[-1].asDict()
template.pop('id') # that will be replaced
item_cls = containers.classByString[new_db.itemtype]
# Create items and insert on their respective places
for index, item_id in scenario.additions:
new_item = item_cls(id=item_id, **template)
new_db.items.insert(index, new_item)
return new_db

def __test_body(self, scenario):
"""since they all look the same..."""
alter_db = self.makeModifiedDatabase(scenario)
# Make sure the databases are actually different!
self.assertNotEqual(alter_db, self.orig_db)
# Call update and check difference
alter_db.softUpdate()
self.assertEqual(alter_db, self.orig_db)

def test_singleAddition(self):
"""Add a single missing item."""
scenario = UpdateScenario(removals=[0])
self.__test_body(scenario)

def test_simpleAddition(self):
"""Add a few items missing from the first page."""
scenario = UpdateScenario(removals=[0, 1, 2])
self.__test_body(scenario)

def test_nonContinuousAddition(self):
"""Add a few items non-continuously missing from the first page."""
scenario = UpdateScenario(removals=[0, 1, 2, 3, 6])
self.__test_body(scenario)


if __name__ == "__main__":
database.Database.__ne__ = DatabaseDifference.compute
database.Database.__eq__ = DatabaseDifference.ne_to_eq
unittest.main()

0 comments on commit 52d2dc8

Please sign in to comment.