From 52d2dc8051bc1fe04aab3cf9771df2e186ec883e Mon Sep 17 00:00:00 2001
From: Noiredd <snowball91b@gmail.com>
Date: Sun, 26 Apr 2020 22:54:20 +0200
Subject: [PATCH] test suite: database update test framework + simple tests

See the test/README.md entry for details.
---
 filmatyk/database.py  |  12 ++-
 test/README.md        |  36 ++++++-
 test/test_database.py | 230 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 276 insertions(+), 2 deletions(-)
 create mode 100644 test/test_database.py

diff --git a/filmatyk/database.py b/filmatyk/database.py
index 44b6a07..fd0c567 100644
--- a/filmatyk/database.py
+++ b/filmatyk/database.py
@@ -9,18 +9,25 @@ def __init__(self, itemtype:str, api:object, callback):
     self.itemtype = itemtype
     self.callback = callback
     self.items = []
+    self.ids = set() # TODO: optimize using cached IDs
     self.api = api
     self.isDirty = False # are there any changes that need to be saved?
+
   # INTERFACE
   def getItems(self):
     return self.items.copy()
+
   def getItemByID(self, id:int):
     for item in self.items:
       if item.getRawProperty('id') == id:
         return item
+
+  def __iter__(self):
+    return self.items.__iter__()
+
   # Serialization-deserialization
   @staticmethod
-  def restoreFromString(itemtype:object, string:str, api:object, callback):
+  def restoreFromString(itemtype:str, string:str, api:object, callback):
     newDatabase = Database(itemtype, api, callback)
     if not string:
       # simply return a raw, empty DB
@@ -29,8 +36,10 @@ def restoreFromString(itemtype:object, string:str, api:object, callback):
     itemclass = containers.classByString[itemtype]
     newDatabase.items = [itemclass(**dct) for dct in listOfDicts]
     return newDatabase
+
   def storeToString(self):
     return json.dumps([item.asDict() for item in self.items])
+
   # Data acquisition
   def softUpdate(self):
     self.callback(0) #display the progress bar
@@ -74,6 +83,7 @@ def softUpdate(self):
     self.callback(-1)
     self.isDirty = True
     return True
+
   def hardUpdate(self):
     # in theory, this removes all existing items and recollects the whole data
     # but in practice this reacquisition may fail - in which case we shouldn't
diff --git a/test/README.md b/test/README.md
index a36e789..104ac38 100644
--- a/test/README.md
+++ b/test/README.md
@@ -1,7 +1,7 @@
 ## Filmatyk - test suite
 
 ### API tests
-[`test.api`](test_api.py) performs tests of the `FilmwebAPI` class
+[`test_api.py`](test_api.py) performs tests of the `FilmwebAPI` class
 ([`filmweb.py`](../filmatyk/filmweb.py)) - fetching and parsing content from Filmweb.
 
 Testing the whole program would typically require logging in to Filmweb.pl,
@@ -35,3 +35,37 @@ while their ratings are elsewhere.
 Tests are done sequentially, from locating the sources for data,
 through parsing a single entity, to parsing a complete page.
 
+### Database tests
+[`test_database.py`](test_database.py) performs tests of the `Database` class
+([`database.py`](../filmatyk/database.py)) - updating and serialization (TODO).
+
+For speed and convenience, the update is not performed online.
+Instead, a fake API object is created (`FakeAPI`) that mimmicks the normal `FilmwebAPI` behavior,
+except instead of connecting to Filmweb.pl it reads data cached previously by the API tests.
+
+For debugging and development, a `DatabaseDifference` class is introduced
+to encapsulate the notion of, well, difference between two `Database` objects.
+This object is constructed by a static method `compute` and is `bool`-convertible,
+which allows implementing a `!=` operator for the `Database`.
+Additionally, it is `str`-convertible, allowing nice printing for explaining the difference.
+Before the tests are run, `DatabaseDifference.compute` is injected into the `Database`
+as the `__ne__` operator, so all comparisons during the testing are done this way.
+
+*Note*: `Database` is only tested for the `Movie` item type,
+as the algorithm is abstract with respect to the item type.
+
+#### Update tests
+
+Database update tests are performed by loading a reference `Database` first
+(this utilizes assets cached in API tests).
+Each test simulates a situation in which this reference `Database` is the desired outcome,
+and the "current" state is generated dynamically according to some scenario.
+`UpdateScenario` encapsulates the idea of such scenario,
+for example "the current state misses the first two items" could be expressed by:  
+`scenario = UpdateScenario(removals=[0, 1])`  
+The function `makeModifiedDatabase` ingests such `Scenario` object
+and constructs a new `Database`, simulating a previous state (as if "undoing" an update).
+During the test, this "undone" object attempts to update itself to the reference state.
+Results are compared using the aforementioned `DatabaseDifference`.
+
+
diff --git a/test/test_database.py b/test/test_database.py
new file mode 100644
index 0000000..f4e75f3
--- /dev/null
+++ b/test/test_database.py
@@ -0,0 +1,230 @@
+import os
+import sys
+from typing import List, Set, Tuple
+import unittest
+
+sys.path.append(os.path.join('..', 'filmatyk'))
+import containers
+import database
+import filmweb
+
+from api_prototype import loadDatabase, getApi
+
+class DatabaseDifference():
+  """Represents a difference between two DBs.
+
+  Can be constructed using the "compute" @staticmethod, which can be used to
+  replace the __ne__ (!=) operator on the Database class. This way, comparing
+  (db1 != db2) returns an instance of this class, which:
+  * holds detailed information on difference, specifically two sets of IDs (one
+    for objects present in db1 and not in db2, other for vice-versa) and a list
+    of all differing Items,
+  * is bool-convertible, allowing its usage in if clauses,
+  * has a __repr__ so can be pretty printed.
+
+  Example usage:
+    db1:database.Database
+    db2:database.Database
+    diff = db1 != db2
+    print(diff)
+  """
+  @staticmethod
+  def ne_to_eq(a, b):
+    """Since overriding __ne__ by "compute" makes more sense than __eq__,
+
+    we invert != to obtain ==, not the other way around.
+    """
+    return not (a != b)
+
+  @staticmethod
+  def compute(db1, db2):
+    """Finds the difference between the two objects."""
+    # Work with IDs only
+    ids1 = set(item.getRawProperty('id') for item in db1)
+    ids2 = set(item.getRawProperty('id') for item in db2)
+    # Compute differences
+    common_ids = ids1.intersection(ids2)
+    only_in_1 = ids1.difference(common_ids)
+    only_in_2 = ids2.difference(common_ids)
+    # Extract Item instances for pretty printing
+    items_1 = [item for item in db1 if item.getRawProperty('id') in only_in_1]
+    items_2 = [item for item in db2 if item.getRawProperty('id') in only_in_2]
+    return DatabaseDifference(only_in_1, only_in_2, items_1+items_2)
+
+  def __init__(self, ids1:Set[int], ids2:Set[int], items:List[containers.Item]):
+    self.ids1 = ids1
+    self.ids2 = ids2
+    self.items = {item.getRawProperty('id'): item for item in items}
+    self.equal = len(self.ids1) == 0 and len(self.ids2) == 0
+
+  def __str__(self):
+    if self.equal:
+      return 'These databases are equal!'
+    else:
+      lines = []
+      if self.ids1:
+        lines.append('These {} IDs were present only in DB1:'.format(len(self.ids1)))
+        lines.extend('\t{} ({})'.format(i, self.items[i]['title']) for i in self.ids1)
+      if self.ids2:
+        lines.append('These {} IDs were present only in DB2:'.format(len(self.ids2)))
+        lines.extend('\t{} ({})'.format(i, self.items[i]['title']) for i in self.ids2)
+      return '\n'.join(lines)
+
+  def __repr__(self):
+    print(self)
+
+  def __bool__(self):
+    return not self.equal  
+
+
+class FakeAPI(filmweb.FilmwebAPI):
+  """Loads cached data instead of connecting online.
+
+  When initializing, will look for HTML files in the given directory and treat
+  them as "pages" to load data from, later when emulating "getItemsPage".
+  """
+  def __init__(self, src_path:str='', itemtype:str='Movie'):
+    super(FakeAPI, self).__init__(None)
+    self.src_path = src_path
+    self.page_paths = self.initPages()
+    self.item_count, self.items_per_page = self.initAnalyze(itemtype)
+
+  def initPages(self):
+    """Finds HTML files with movie ratings cached by the API tests."""
+    if not os.path.exists(self.src_path):
+      return []
+    pages = [
+      item.path for item in os.scandir(self.src_path)
+      if item.name.endswith('.html') and item.name.startswith('movies_')
+    ]
+    return pages
+
+  def initAnalyze(self, itemtype:str):
+    """Checks how many items are in the stored files, and how many per page."""
+    counts = []
+    for path in self.page_paths:
+      page = self.fetchPage(path)
+      items = self.parsePage(page, itemtype)
+      counts.append(len(items))
+    # Return in the same format as getNumOf.
+    # The first page will either have exactly as many items as any other page,
+    # or will contain all items - in either case its length being the count of
+    # items per page.
+    return sum(counts), counts[0]
+
+  def checkSession(self):
+    """First part of the hack - don't bother with the session at all."""
+    return True
+
+  def fetchPage(self, path:str):
+    """Load HTML from file instead of URL."""
+    with open(path, 'r', encoding='utf-8') as html:
+      page = filmweb.BS(html.read(), features='lxml')
+    return page
+
+  def getItemsPage(self, itemtype:str, page:int=1):
+    """Hack to use cached HTMLs instead of online session."""
+    path = self.page_paths[page - 1]
+    #path = os.path.join(self.src_path, 'movies_{}.html'.format(page))
+    page = self.fetchPage(path)
+    items = self.parsePage(page, itemtype)
+    return items
+
+  def getNumOf(self, itemtype:str):
+    """Simply return the values we have computed earlier (initAnalyze)."""
+    return self.item_count, self.items_per_page
+
+
+class UpdateScenario():
+  """Database modification scenario to obtain a simulated previous state.
+
+  Contains:
+  * a list of Item indices to remove from the Database - a new Database created
+    via this removal will look like these items were yet to be added,
+  * a list of tuples of Item indices and IDs to add to the Database - simulates
+    removal of items in the same manner.
+  """
+  def __init__(self, removals:List[int]=[], additions:List[Tuple[int,int]]=[]):
+    self.removals = removals
+    self.additions = additions
+
+
+class TestDatabaseUpdates(unittest.TestCase):
+  """Test Database updates capability in different initial conditions.
+
+  Each test consists of the following 3 steps:
+  * load an original Database,
+  * perform some change to its content, simulating some earlier point in time
+    (e.g. where some Items were not yet present),
+  * call a soft update.
+  The desired result is a Database back in the original state. Any differences
+  are considered failures.
+
+  The update itself is performed via a proxy, which loads data cached from
+  earlier tests instead of requiring a live and authenticated session.
+  """
+  @classmethod
+  def setUpClass(self):
+    self.api = FakeAPI('data')
+    # Create the original database
+    self.orig_db = database.Database(
+      itemtype='Movie', api=self.api, callback=lambda x: x
+    )
+    # Fill it with available cached data
+    for i in range(len(self.api.page_paths)):
+      self.orig_db.items += self.api.getItemsPage('Movie', page=i+1)
+
+  @classmethod
+  def makeModifiedDatabase(self, scenario:UpdateScenario):
+    """Creates a new DB by modifying the copy according to the scenario."""
+    # Create a bare new instance
+    new_db = database.Database(
+      itemtype=self.orig_db.itemtype,
+      api=self.orig_db.api,
+      callback=self.orig_db.callback,
+    )
+    # Remove items according to the scenario
+    new_db.items = [
+      item for i, item in enumerate(self.orig_db.items)
+      if i not in scenario.removals
+    ]
+    # Add new items according to the scenario
+    # The items are all clones of the last available item, with changed ID
+    template = new_db.items[-1].asDict()
+    template.pop('id')  # that will be replaced
+    item_cls = containers.classByString[new_db.itemtype]
+    # Create items and insert on their respective places
+    for index, item_id in scenario.additions:
+      new_item = item_cls(id=item_id, **template)
+      new_db.items.insert(index, new_item)
+    return new_db
+
+  def __test_body(self, scenario):
+    """since they all look the same..."""
+    alter_db = self.makeModifiedDatabase(scenario)
+    # Make sure the databases are actually different!
+    self.assertNotEqual(alter_db, self.orig_db)
+    # Call update and check difference
+    alter_db.softUpdate()
+    self.assertEqual(alter_db, self.orig_db)
+
+  def test_singleAddition(self):
+    """Add a single missing item."""
+    scenario = UpdateScenario(removals=[0])
+    self.__test_body(scenario)
+
+  def test_simpleAddition(self):
+    """Add a few items missing from the first page."""
+    scenario = UpdateScenario(removals=[0, 1, 2])
+    self.__test_body(scenario)
+
+  def test_nonContinuousAddition(self):
+    """Add a few items non-continuously missing from the first page."""
+    scenario = UpdateScenario(removals=[0, 1, 2, 3, 6])
+    self.__test_body(scenario)
+
+
+if __name__ == "__main__":
+  database.Database.__ne__ = DatabaseDifference.compute
+  database.Database.__eq__ = DatabaseDifference.ne_to_eq
+  unittest.main()