Skip to content

Commit

Permalink
test suite: fetching and parsing tests for all item types
Browse files Browse the repository at this point in the history
  • Loading branch information
Noiredd committed Apr 26, 2020
1 parent aac15e9 commit 69e6a8d
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 20 deletions.
1 change: 1 addition & 0 deletions filmatyk/filmweb.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ def getItemsPage(self, itemtype:str, page:int=1):
data = self.parsePage(page, itemtype)
return data

@enforceSession
def fetchPage(self, url):
"""Fetch the page and return its BeautifulSoup representation."""
try:
Expand Down
4 changes: 2 additions & 2 deletions test/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ Test class `TestAPIBasics` encapsulates tests of the most fundamental functional
* logging in to Filmweb.pl,
* fetching raw HTML data.

However, the most important one is the `fetch_save` test, which not only grabs online data,
but also **stores it locally** (in `test/assets`), to simplify performing other tests.
However, the most important one are the `fetch_save` tests, which not only grab online data,
but also **store it locally** (in `test/assets`), to simplify performing other tests.
Since this activity prompts for a Filmweb.pl **log-in**, it is disabled by default.
However, it is **required** to execute this test at least once -
otherwise other tests will error out.
Expand Down
107 changes: 89 additions & 18 deletions test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_01_login(self):
self.assertIsNotNone(api.session)
self.storeAPI(api)

def test_02_fetch_one(self):
def test_02_fetch_one_movie(self):
"""Attempt to download a single page of movie ratings from Filmweb."""
self.assertIsNotNone(self.api)
self.assertIsNotNone(self.api.session)
Expand All @@ -73,14 +73,15 @@ def test_02_fetch_one(self):
self.assertIsInstance(text, str)
self.assertGreater(len(text), 100 * 2 ** 10)

def test_03_fetch_save(self):
def test_10_fetch_save_movies(self):
"""Attempt to download 3 pages of movie ratings from Filmweb.
This also stores them as "assets" for other tests.
"""
N_PAGES = 3
for i in range(N_PAGES):
url = self.api.Constants.getUserMoviePage(self.api.username, page=i+1)
getURL = self.api.urlGenerationMethods['Movie']
url = getURL(self.api.username, page=i+1)
page = self.api.fetchPage(url)
path = os.path.join('assets', 'movies_{}.html'.format(i+1))
with open(path, 'w', encoding='utf-8') as html:
Expand All @@ -90,55 +91,125 @@ def test_03_fetch_save(self):
for i in range(N_PAGES):
self.assertIn('movies_{}.html'.format(i+1), os.listdir('assets'))

def test_20_fetch_save_series(self):
"""Attempt to download and save a page of series ratings."""
getURL = self.api.urlGenerationMethods['Series']
page_num = 1
url = getURL(self.api.username, page=page_num)
page = self.api.fetchPage(url)
path = os.path.join('assets', 'series_{}.html'.format(page_num))
with open(path, 'w', encoding='utf-8') as html:
text = page.prettify()
self.assertGreater(len(text), 100 * 2 ** 10)
html.write(text)
self.assertIn('series_{}.html'.format(page_num), os.listdir('assets'))

def test_30_fetch_save_games(self):
"""Attempt to download and save a page of game ratings."""
getURL = self.api.urlGenerationMethods['Game']
page_num = 1
url = getURL(self.api.username, page=page_num)
page = self.api.fetchPage(url)
path = os.path.join('assets', 'games_{}.html'.format(page_num))
with open(path, 'w', encoding='utf-8') as html:
text = page.prettify()
self.assertGreater(len(text), 100 * 2 ** 10)
html.write(text)
self.assertIn('games_{}.html'.format(page_num), os.listdir('assets'))


class TestAPIParsing(unittest.TestCase):
"""Test API parsing functionalities.
Starts with extraction of main data region - a div that holds details of all
items and ratings. Then tests parsing of individual items, finally of a whole
page.
Basic tests are done on Movies, later the suite extends to other types too.
Basics start with extraction of main data region - a div that holds details
of all items and ratings. Then tests parsing of individual items, finally of
a whole page.
"""

@classmethod
def setUpClass(self):
self.api = filmweb.FilmwebAPI(None)
self.page = None
with open(os.path.join('assets', 'movies_1.html'), 'r', encoding='utf-8') as html:
self.page = BS(html.read(), 'lxml')
self.moviePagePath = os.path.join('assets', 'movies_1.html')
self.seriesPagePath = os.path.join('assets', 'series_1.html')
self.gamePagePath = os.path.join('assets', 'games_1.html')

@staticmethod
def getPage(path:str):
"""Load a cached page into a BeautifulSoup format."""
with open(path, 'r', encoding='utf-8') as html:
return BS(html.read(), features='lxml')

def test_01_data_source_extract(self):
"""Find the main div containing details of rated objects."""
div = self.api.extractDataSource(self.page)
page = self.getPage(self.moviePagePath)
div = self.api.extractDataSource(page)
self.assertIsNotNone(div)
self.assertGreater(len(div.getText()), 10**4)

def test_02_item_divs_extract(self):
"""Retrieve all the item detail divs."""
div = self.api.extractDataSource(self.page)
page = self.getPage(self.moviePagePath)
div = self.api.extractDataSource(page)
items = self.api.extractItems(div)
self.assertGreater(len(items), 0)

def test_03_item_ratings_extract(self):
"""Retrieve all the item rating strings."""
div = self.api.extractDataSource(self.page)
page = self.getPage(self.moviePagePath)
div = self.api.extractDataSource(page)
items = self.api.extractItems(div)
ratings = self.api.extractRatings(div)
self.assertEqual(len(items), len(ratings))

def test_04_single_parsing(self):
"""Parse a single item and rating."""
div = self.api.extractDataSource(self.page)
def __test_single_body(self, page:BS, itemtype:str):
"""Performs the entire test of single item parsing."""
div = self.api.extractDataSource(page)
items = self.api.extractItems(div)
item = self.api.parseOne(items[0], 'Movie')
item = self.api.parseOne(items[0], itemtype)
# We don't know much about the parsed items, but they will have titles...
self.assertGreater(len(item['title']), 2)
ratings = self.api.extractRatings(div)
rating, rid = self.api.parseRating(ratings[0])
# ...and ratings, and IDs
self.assertIn('rating', rating.keys())
self.assertEqual(rid, item.getRawProperty('id'))

def test_10_parse_page(self):
def test_10_single_movie_parsing(self):
"""Parse a single movie and rating."""
page = self.getPage(self.moviePagePath)
self.__test_single_body(page, 'Movie')

def test_11_single_series_parsing(self):
"""Parse a single series and rating."""
page = self.getPage(self.seriesPagePath)
self.__test_single_body(page, 'Series')

def test_12_single_game_parsing(self):
"""Parse a single game and rating."""
page = self.getPage(self.gamePagePath)
self.__test_single_body(page, 'Game')

def test_20_parse_movie_page(self):
"""Parse an entire page of movies."""
page = self.getPage(self.moviePagePath)
items = self.api.parsePage(page, 'Movie')
# Again, it's hard to tell anything about the items we retrieve, except for
# the fact that they will exist, and no exception will be thrown during
# parsing.
self.assertGreater(len(items), 0)

def test_21_parse_series_page(self):
"""Parse an entire page of movies."""
page = self.getPage(self.seriesPagePath)
items = self.api.parsePage(page, 'Series')
self.assertGreater(len(items), 0)

def test_22_parse_game_page(self):
"""Parse an entire page of movies."""
items = self.api.parsePage(self.page, 'Movie')
page = self.getPage(self.gamePagePath)
items = self.api.parsePage(page, 'Game')
self.assertGreater(len(items), 0)


Expand Down

0 comments on commit 69e6a8d

Please sign in to comment.