diff --git a/data/interfaces/bookstrap/config.html b/data/interfaces/bookstrap/config.html index 0076bafda..ab6c7266a 100644 --- a/data/interfaces/bookstrap/config.html +++ b/data/interfaces/bookstrap/config.html @@ -1261,6 +1261,12 @@

${title}

% +
+ +
+ +
+
diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html index 05eda931b..526403686 100644 --- a/data/interfaces/default/config.html +++ b/data/interfaces/default/config.html @@ -879,6 +879,10 @@

Miscellaneous

(percent)
+
+ + +

diff --git a/lazylibrarian/__init__.py b/lazylibrarian/__init__.py index b44581918..02bf6ceed 100644 --- a/lazylibrarian/__init__.py +++ b/lazylibrarian/__init__.py @@ -113,7 +113,7 @@ CONFIG_GIT = ['GIT_REPO', 'GIT_USER', 'GIT_BRANCH', 'LATEST_VERSION', 'GIT_UPDATED', 'CURRENT_VERSION', 'COMMITS_BEHIND', 'INSTALL_TYPE'] CONFIG_NONWEB = ['LOGFILES', 'LOGSIZE', 'NAME_POSTFIX', 'DIR_PERM', 'FILE_PERM', 'BLOCKLIST_TIMER', - 'WALL_COLUMNS', 'MAX_PAGES'] + 'WALL_COLUMNS'] CONFIG_NONDEFAULT = ['BOOKSTRAP_THEME', 'AUDIOBOOK_TYPE', 'AUDIO_DIR', 'AUDIO_TAB', 'REJECT_AUDIO', 'REJECT_MAXAUDIO', 'REJECT_MINAUDIO', 'NEWAUDIO_STATUS', 'TOGGLES', 'AUDIO_TAB'] CONFIG_DEFINITIONS = { diff --git a/lazylibrarian/resultlist.py b/lazylibrarian/resultlist.py index 239cb1d2f..1e50aa34e 100644 --- a/lazylibrarian/resultlist.py +++ b/lazylibrarian/resultlist.py @@ -90,6 +90,7 @@ def findBestResult(resultlist, book, searchtype, source): prefix = 'tor_' logger.debug('Searching %s %s results for best %s match' % (len(resultlist), source, auxinfo)) + matches = [] for res in resultlist: resultTitle = unaccented_str(replace_all(res[prefix + 'title'], dictrepl)).strip() diff --git a/lazylibrarian/searchbook.py b/lazylibrarian/searchbook.py index dd57057ea..3f184c371 100644 --- a/lazylibrarian/searchbook.py +++ b/lazylibrarian/searchbook.py @@ -151,8 +151,10 @@ def search_book(books=None, library=None): modelist.remove('rss') break - if nproviders: + if resultlist: match = findBestResult(resultlist, book, searchtype, mode) + else: + match = None # if you can't find the book, try author/title without any "(extended details, series etc)" if not goodEnough(match) and '(' in book['bookName']: @@ -164,7 +166,10 @@ def search_book(books=None, library=None): elif mode == 'rss': resultlist = rss_resultlist - match = findBestResult(resultlist, book, searchtype, mode) + if resultlist: + match = findBestResult(resultlist, book, searchtype, mode) + else: + match = None # if you can't find the book under "books", you might find under general search # general search is the same as booksearch for torrents and rss, no need to check again @@ -172,14 +177,20 @@ def search_book(books=None, library=None): searchtype = 'general' if mode == 'nzb': resultlist, nproviders = IterateOverNewzNabSites(book, searchtype) - match = findBestResult(resultlist, book, searchtype, mode) + if resultlist: + match = findBestResult(resultlist, book, searchtype, mode) + else: + match = None # if still not found, try general search again without any "(extended details, series etc)" if not goodEnough(match) and '(' in book['searchterm']: searchtype = 'shortgeneral' if mode == 'nzb': resultlist, nproviders = IterateOverNewzNabSites(book, searchtype) - match = findBestResult(resultlist, book, searchtype, mode) + if resultlist: + match = findBestResult(resultlist, book, searchtype, mode) + else: + match = None if not goodEnough(match): logger.info("%s Searches for %s %s returned no results." % diff --git a/lazylibrarian/torrentparser.py b/lazylibrarian/torrentparser.py index b2845e1b0..63409602f 100644 --- a/lazylibrarian/torrentparser.py +++ b/lazylibrarian/torrentparser.py @@ -21,7 +21,7 @@ import lib.feedparser as feedparser from lazylibrarian import logger from lazylibrarian.cache import fetchURL -from lazylibrarian.formatter import plural, unaccented +from lazylibrarian.formatter import plural, unaccented, formatAuthorName from lib.BeautifulSoup import BeautifulSoup @@ -52,101 +52,109 @@ def TPB(book=None): elif book['library'] == 'magazine': cat = 0 - params = { - "category": cat, - "page": "0", - "orderby": "99" - } - searchURL = providerurl + "&%s" % urllib.urlencode(params) - - result, success = fetchURL(searchURL) - if not success: - # may return 404 if no results, not really an error - if '404' in result: - logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) - else: - logger.debug(searchURL) - logger.debug('Error fetching data from %s: %s' % (provider, result)) - errmsg = result - result = False - + page = 0 results = [] + next_page = True + + while next_page: + + params = { + "category": cat, + "page": page, + "orderby": "99" + } + + searchURL = providerurl + "&%s" % urllib.urlencode(params) + next_page = False + result, success = fetchURL(searchURL) + if not success: + # may return 404 if no results, not really an error + if '404' in result: + logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) + else: + logger.debug(searchURL) + logger.debug('Error fetching data from %s: %s' % (provider, result)) + errmsg = result + result = False + + if result: + logger.debug(u'Parsing results from %s' % (searchURL, provider)) + minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 + soup = BeautifulSoup(result) + try: + table = soup.findAll('table')[0] + rows = table.findAll('tr') + except Exception: # no results = no table in result page + rows = [] - if result: - logger.debug(u'Parsing results from %s' % (searchURL, provider)) - minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 - soup = BeautifulSoup(result) - try: - table = soup.findAll('table')[0] - rows = table.findAll('tr') - except Exception: # no results = no table in result page - rows = [] - - c1 = [] - c2 = [] + if len(rows) == 1: + rows = [] - if len(rows) > 1: for row in rows[1:]: - if len(row.findAll('td')) > 2: - c1.append(row.findAll('td')[1]) - c2.append(row.findAll('td')[2]) - - for col1, col2 in zip(c1, c2): - try: - title = unaccented(str(col1).split('title=')[1].split('>')[1].split('<')[0]) - magnet = str(col1).split('href="')[1].split('"')[0] - size = unaccented(col1.text.split(', Size ')[1].split('iB')[0]) - mult = 1 - try: - if 'K' in size: - size = size.split('K')[0] - mult = 1024 - elif 'M' in size: - size = size.split('M')[0] - mult = 1024 * 1024 - size = int(float(size) * mult) - except (ValueError, IndexError): - size = 0 - try: - seeders = int(col2.text) - except ValueError: - seeders = 0 - - if minimumseeders < seeders: - # no point in asking for magnet link if not enough seeders - magurl = '%s/%s' % (host, magnet) - result, success = fetchURL(magurl) - if not success: - logger.debug('Error fetching url %s, %s' % (magurl, result)) - else: - magnet = None - new_soup = BeautifulSoup(result) - for link in new_soup.findAll('a'): - output = link.get('href') - if output and output.startswith('magnet'): - magnet = output - break - if not magnet or not title: - logger.debug('Missing magnet or title') - else: - if minimumseeders < seeders: - results.append({ - 'bookid': book['bookid'], - 'tor_prov': provider, - 'tor_title': title, - 'tor_url': magnet, - 'tor_size': str(size), - 'tor_type': 'magnet', - 'priority': lazylibrarian.CONFIG['TPB_DLPRIORITY'] - }) - logger.debug('Found %s. Size: %s' % (title, size)) + td = row.findAll('td') + if len(td) > 2: + try: + title = unaccented(str(td[1]).split('title=')[1].split('>')[1].split('<')[0]) + magnet = str(td[1]).split('href="')[1].split('"')[0] + size = unaccented(td[1].text.split(', Size ')[1].split('iB')[0]) + size = size.replace(' ', '') + mult = 1 + try: + if 'K' in size: + size = size.split('K')[0] + mult = 1024 + elif 'M' in size: + size = size.split('M')[0] + mult = 1024 * 1024 + elif 'G' in size: + size = size.split('G')[0] + mult = 1024 * 1024 * 1024 + size = int(float(size) * mult) + except (ValueError, IndexError): + size = 0 + try: + seeders = int(td[2].text) + except ValueError: + seeders = 0 + + if minimumseeders < int(seeders): + # no point in asking for magnet link if not enough seeders + magurl = '%s/%s' % (host, magnet) + result, success = fetchURL(magurl) + if not success: + logger.debug('Error fetching url %s, %s' % (magurl, result)) + else: + magnet = None + new_soup = BeautifulSoup(result) + for link in new_soup.findAll('a'): + output = link.get('href') + if output and output.startswith('magnet'): + magnet = output + break + if not magnet or not title: + logger.debug('Missing magnet or title') + else: + results.append({ + 'bookid': book['bookid'], + 'tor_prov': provider, + 'tor_title': title, + 'tor_url': magnet, + 'tor_size': str(size), + 'tor_type': 'magnet', + 'priority': lazylibrarian.CONFIG['TPB_DLPRIORITY'] + }) + logger.debug('Found %s. Size: %s' % (title, size)) + next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) - else: - logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) - except Exception as e: - logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) - logger.debug('%s: %s' % (provider, traceback.format_exc())) + except Exception as e: + logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) + logger.debug('%s: %s' % (provider, traceback.format_exc())) + + page += 1 + if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: + logger.warn('Maximum results page search reached, still more results available') + next_page = False logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) @@ -193,74 +201,71 @@ def KAT(book=None): except Exception: # no results = no table in result page rows = [] - c0 = [] - c1 = [] - c3 = [] - - if len(rows) > 1: - for row in rows[1:]: - if len(row.findAll('td')) > 3: - c0.append(row.findAll('td')[0]) - c1.append(row.findAll('td')[1]) - c3.append(row.findAll('td')[3]) + if len(rows) == 1: + rows = [] - for col0, col1, col3 in zip(c0, c1, c3): - try: - title = unaccented(str(col0).split('cellMainLink">')[1].split('<')[0]) - # kat can return magnet or torrent or both. - magnet = '' - url = '' - mode = 'torrent' - try: - magnet = 'magnet' + str(col0).split('href="magnet')[1].split('"')[0] - mode = 'magnet' - except IndexError: - pass + for row in rows[1:]: + td = row.findAll('td') + if len(td) > 3: try: - url = 'http' + str(col0).split('href="http')[1].split('.torrent?')[0] + '.torrent' + title = unaccented(str(td[0]).split('cellMainLink">')[1].split('<')[0]) + # kat can return magnet or torrent or both. + magnet = '' + url = '' mode = 'torrent' - except IndexError: - pass + try: + magnet = 'magnet' + str(td[0]).split('href="magnet')[1].split('"')[0] + mode = 'magnet' + except IndexError: + pass + try: + url = 'http' + str(td[0]).split('href="http')[1].split('.torrent?')[0] + '.torrent' + mode = 'torrent' + except IndexError: + pass - if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): - url = magnet - mode = 'magnet' + if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): + url = magnet + mode = 'magnet' - try: - size = str(col1.text).replace(' ', '').upper() - mult = 1 - if 'K' in size: - size = size.split('K')[0] - mult = 1024 - elif 'M' in size: - size = size.split('M')[0] - mult = 1024 * 1024 - size = int(float(size) * mult) - except (ValueError, IndexError): - size = 0 - try: - seeders = int(col3.text) - except ValueError: - seeders = 0 - - if not url or not title: - logger.debug('Missing url or title') - elif minimumseeders < seeders: - results.append({ - 'bookid': book['bookid'], - 'tor_prov': provider, - 'tor_title': title, - 'tor_url': url, - 'tor_size': str(size), - 'tor_type': mode, - 'priority': lazylibrarian.CONFIG['KAT_DLPRIORITY'] - }) - logger.debug('Found %s. Size: %s' % (title, size)) - else: - logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) - except Exception as e: - logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) - logger.debug('%s: %s' % (provider, traceback.format_exc())) + try: + size = str(td[1].text).replace(' ', '').upper() + mult = 1 + if 'K' in size: + size = size.split('K')[0] + mult = 1024 + elif 'M' in size: + size = size.split('M')[0] + mult = 1024 * 1024 + elif 'G' in size: + size = size.split('G')[0] + mult = 1024 * 1024 * 1024 + size = int(float(size) * mult) + except (ValueError, IndexError): + size = 0 + try: + seeders = int(td[3].text) + except ValueError: + seeders = 0 + + if not url or not title: + logger.debug('Missing url or title') + elif minimumseeders < int(seeders): + results.append({ + 'bookid': book['bookid'], + 'tor_prov': provider, + 'tor_title': title, + 'tor_url': url, + 'tor_size': str(size), + 'tor_type': mode, + 'priority': lazylibrarian.CONFIG['KAT_DLPRIORITY'] + }) + logger.debug('Found %s. Size: %s' % (title, size)) + else: + logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) + except Exception as e: + logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) + logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) @@ -315,75 +320,72 @@ def WWT(book=None): except Exception: # no results = no table in result page rows = [] - c0 = [] - c1 = [] - c2 = [] - - if len(rows) > 1: - for row in rows[1:]: - if len(row.findAll('td')) > 3: - c0.append(row.findAll('td')[0]) - c1.append(row.findAll('td')[1]) - c2.append(row.findAll('td')[2]) - - for col0, col1, col2 in zip(c0, c1, c2): - try: - title = unaccented(str(col0).split('title="')[1].split('"')[0]) + if len(rows) == 1: + rows = [] - # kat can return magnet or torrent or both. - magnet = '' - url = '' - mode = 'torrent' + for row in rows[1:]: + td = row.findAll('td') + if len(td) > 3: try: - magnet = 'magnet' + str(col0).split('href="magnet')[1].split('"')[0] - mode = 'magnet' - except IndexError: - pass - try: - url = url_fix(host + '/download.php') + \ - str(col0).split('href="download.php')[1].split('.torrent"')[0] + '.torrent' + title = unaccented(str(td[0]).split('title="')[1].split('"')[0]) + + # kat can return magnet or torrent or both. + magnet = '' + url = '' mode = 'torrent' - except IndexError: - pass + try: + magnet = 'magnet' + str(td[0]).split('href="magnet')[1].split('"')[0] + mode = 'magnet' + except IndexError: + pass + try: + url = url_fix(host + '/download.php') + \ + str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent' + mode = 'torrent' + except IndexError: + pass - if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): - url = magnet - mode = 'magnet' + if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): + url = magnet + mode = 'magnet' - try: - size = str(col1.text).replace(' ', '').upper() - mult = 1 - if 'K' in size: - size = size.split('K')[0] - mult = 1024 - elif 'M' in size: - size = size.split('M')[0] - mult = 1024 * 1024 - size = int(float(size) * mult) - except (ValueError, IndexError): - size = 0 - try: - seeders = int(col2.text) - except ValueError: - seeders = 0 - if not url or not title: - logger.debug('Missing url or title') - elif minimumseeders < seeders: - results.append({ - 'bookid': book['bookid'], - 'tor_prov': provider, - 'tor_title': title, - 'tor_url': url, - 'tor_size': str(size), - 'tor_type': mode, - 'priority': lazylibrarian.CONFIG['WWT_DLPRIORITY'] - }) - logger.debug('Found %s. Size: %s' % (title, size)) - else: - logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) - except Exception as e: - logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) - logger.debug('%s: %s' % (provider, traceback.format_exc())) + try: + size = str(td[1].text).replace(' ', '').upper() + mult = 1 + if 'K' in size: + size = size.split('K')[0] + mult = 1024 + elif 'M' in size: + size = size.split('M')[0] + mult = 1024 * 1024 + elif 'G' in size: + size = size.split('G')[0] + mult = 1024 * 1024 * 1024 + size = int(float(size) * mult) + except (ValueError, IndexError): + size = 0 + try: + seeders = int(td[2].text) + except ValueError: + seeders = 0 + if not url or not title: + logger.debug('Missing url or title') + elif minimumseeders < int(seeders): + results.append({ + 'bookid': book['bookid'], + 'tor_prov': provider, + 'tor_title': title, + 'tor_url': url, + 'tor_size': str(size), + 'tor_type': mode, + 'priority': lazylibrarian.CONFIG['WWT_DLPRIORITY'] + }) + logger.debug('Found %s. Size: %s' % (title, size)) + else: + logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) + except Exception as e: + logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) + logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) @@ -445,7 +447,7 @@ def EXTRA(book=None): if not url or not title: logger.debug('No url or title found') - elif minimumseeders < seeders: + elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, @@ -521,7 +523,7 @@ def ZOO(book=None): if not url or not title: logger.debug('No url or title found') - elif minimumseeders < seeders: + elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, @@ -597,7 +599,7 @@ def LIME(book=None): if not url or not title: logger.debug('No url or title found') - elif minimumseeders < seeders: + elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, @@ -632,119 +634,164 @@ def GEN(book=None): if not str(host)[:4] == "http": host = 'http://' + host - if not search or not search.endswith('.php'): - search = 'search.php' - if search[0] == '/': - search = search[1:] - - searchURL = url_fix(host + "/%s?view=simple&open=0&phrase=0&column=def&res=100&req=%s" % - (search, book['searchterm'])) - - result, success = fetchURL(searchURL) - if not success: - # may return 404 if no results, not really an error - if '404' in result: - logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) - elif '111' in result: - # looks like libgen has ip based access limits - logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) - errmsg = result - else: - logger.debug(searchURL) - logger.debug('Error fetching data from %s: %s' % (provider, result)) - errmsg = result - - result = False - + page = 1 results = [] - - if result: - logger.debug(u'Parsing results from %s' % (searchURL, provider)) - soup = BeautifulSoup(result) - try: - table = soup.findAll('table')[2] - rows = table.findAll('tr') - except Exception: # no results = no table in result page - rows = [] - - c1 = [] - c2 = [] - c7 = [] - c8 = [] - - if len(rows) > 1: - for row in rows[1:]: - if len(row.findAll('td')) > 8: - c1.append(row.findAll('td')[1]) - c2.append(row.findAll('td')[2]) - c7.append(row.findAll('td')[7]) - c8.append(row.findAll('td')[8]) - - for col1, col2, col7, col8 in zip(c1, c2, c7, c8): + next_page = True + + while next_page: + if not search or not search.endswith('.php'): + search = 'search.php' + if not 'index.php' in search and not 'search.php' in search: + search = 'search.php' + if search[0] == '/': + search = search[1:] + + pagenum = '' + if page > 1: + pagenum = '&page=%s' % page + + if 'index.php' in search: + searchURL = url_fix(host + "/%s?%s&s=%s" % + (search, pagenum, book['searchterm'])) + else: + searchURL = url_fix(host + "/%s?view=simple&open=0&phrase=0&column=def&res=100%s&req=%s" % + (search, pagenum, book['searchterm'])) + + next_page = False + result, success = fetchURL(searchURL) + if not success: + # may return 404 if no results, not really an error + if '404' in result: + logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) + elif '111' in result: + # looks like libgen has ip based access limits + logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) + errmsg = result + else: + logger.debug(searchURL) + logger.debug('Error fetching data from %s: %s' % (provider, result)) + errmsg = result + + result = False + + if result: + logger.debug(u'Parsing results from %s' % (searchURL, provider)) try: - author = unaccented(col1.text) - title = unaccented(str(col2).split('>')[2].split('<')[0].strip()) - link = str(col2).split('href="')[1].split('?')[1].split('"')[0] - size = unaccented(col7.text).upper() - extn = col8.text - + soup = BeautifulSoup(result) try: - mult = 1 - if 'K' in size: - size = size.split('K')[0] - mult = 1024 - elif 'M' in size: - size = size.split('M')[0] - mult = 1024 * 1024 - size = int(float(size) * mult) - except (ValueError, IndexError): - size = 0 - - if link and title: - if author: - title = author.strip() + ' ' + title.strip() - if extn: - title = title + '.' + extn - - url = url_fix(host + "/ads.php?" + link) - - bookresult, success = fetchURL(url) - if not success: - # may return 404 if no results, not really an error - if '404' in bookresult: - logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) + table = soup.findAll('table')[2] + rows = table.findAll('tr') + except Exception: # no results = no table in result page + rows = [] + + if 'search.php' in search and len(rows) > 1: + rows = rows[1:] + + for row in rows: + author = '' + title = '' + size = '' + extn = '' + link = '' + td = row.findAll('td') + if 'index.php' in search and len(td) > 3: + try: + author = formatAuthorName(unaccented(td[0].text)) + title = unaccented(td[2].text) + temp = str(td[4]) + temp = temp.split('onmouseout')[1] + extn = temp.split('">')[1].split('(')[0] + size = temp.split('">')[1].split('(')[1].split(')')[0] + size = size.upper() + link = temp.split('href=')[2].split('"')[1] + except IndexError as e: + logger.debug('Error parsing libgen search.php results: %s' % str(e)) + + elif 'search.php' in search and len(td) > 8: + try: + author = formatAuthorName(unaccented(td[1].text)) + title = unaccented(str(td[2]).split('>')[2].split('<')[0].strip()) + link = str(td[2]).split('href="')[1].split('?')[1].split('"')[0] + size = unaccented(td[7].text).upper() + extn = td[8].text + except IndexError as e: + logger.debug('Error parsing libgen search.php results; %s' % str(e)) + + if not size: + size = 0 + else: + try: + mult = 1 + if 'K' in size: + size = size.split('K')[0] + mult = 1024 + elif 'M' in size: + size = size.split('M')[0] + mult = 1024 * 1024 + elif 'G' in size: + size = size.split('G')[0] + mult = 1024 * 1024 * 1024 + size = int(float(size) * mult) + except (ValueError, IndexError): + size = 0 + + if link and title: + if author: + title = author.strip() + ' ' + title.strip() + if extn: + title = title + '.' + extn + + if link.startswith('http'): + url = link else: - logger.debug(url) - logger.debug('Error fetching data from %s: %s' % (provider, bookresult)) - errmsg = bookresult - bookresult = False + url = url_fix(host + "/ads.php?" + link) + bookresult, success = fetchURL(url) + if not success: + # may return 404 if no results, not really an error + if '404' in bookresult: + logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) + else: + logger.debug(url) + logger.debug('Error fetching data from %s: %s' % (provider, bookresult)) + errmsg = bookresult + bookresult = False + + if bookresult: + url = None + new_soup = BeautifulSoup(bookresult) + for link in new_soup.findAll('a'): + output = link.get('href') + if output: + if '/get.php' in output: + url = '/get.php' + output.split('/get.php')[1] + break + elif '/download/book' in output: + url = '/download/book' + output.split('/download/book')[1] + break + if url: + url = url_fix(host + url) - if bookresult: - url = None - new_soup = BeautifulSoup(bookresult) - for link in new_soup.findAll('a'): - output = link.get('href') - if output and '/get.php' in output: - url = '/get.php' + output.split('/get.php')[1] - break - if url: - url = url_fix(host + url) - - results.append({ - 'bookid': book['bookid'], - 'tor_prov': provider, - 'tor_title': title, - 'tor_url': url, - 'tor_size': str(size), - 'tor_type': 'direct', - 'priority': lazylibrarian.CONFIG['GEN_DLPRIORITY'] - }) - logger.debug('Found %s, Size %s' % (title, size)) + results.append({ + 'bookid': book['bookid'], + 'tor_prov': provider, + 'tor_title': title, + 'tor_url': url, + 'tor_size': str(size), + 'tor_type': 'direct', + 'priority': lazylibrarian.CONFIG['GEN_DLPRIORITY'] + }) + logger.debug('Found %s, Size %s' % (title, size)) + next_page = True except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) + page += 1 + if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: + logger.warn('Maximum results page search reached, still more results available') + next_page = False + logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results, errmsg @@ -792,7 +839,7 @@ def TDL(book=None): size = int(item['size']) url = None - if link and minimumseeders < seeders: + if link and minimumseeders < int(seeders): # no point requesting the magnet link if not enough seeders # TDL gives us a relative link result, success = fetchURL(providerurl+link) @@ -805,7 +852,6 @@ def TDL(book=None): url = output break - if minimumseeders < int(seeders): if not url or not title: logger.debug('Missing url or title') else: diff --git a/lazylibrarian/webServe.py b/lazylibrarian/webServe.py index de4941a63..f4a113b51 100644 --- a/lazylibrarian/webServe.py +++ b/lazylibrarian/webServe.py @@ -314,14 +314,26 @@ def configUpdate(self, **kwargs): regex = mag['Regex'] # seems kwargs parameters are passed as latin-1, can't see how to # configure it, so we need to correct it on accented magazine names - # eg "Elle Quebec" where we might have e-acute - # otherwise the comparison fails - new_reject = kwargs.get('reject_list[%s]' % title.encode('latin-1'), None) + # eg "Elle Quebec" where we might have e-acute stored as utf-8 + # e-acute is \xe9 in latin-1 but \xc3\xa9 in utf-8 + # otherwise the comparison fails, but sometimes accented characters won't + # fit latin-1 but fit utf-8 gow can we tell ??? + if isinstance(title, str): + try: + title = title.encode('latin-1') + except UnicodeEncodeError: + try: + title = title.encode('utf-8') + except UnicodeEncodeError: + logger.warn('Unable to convert title [%s]' % repr(title)) + title = unaccented(title) + + new_reject = kwargs.get('reject_list[%s]' % title, None) if not new_reject == reject: controlValueDict = {'Title': title} newValueDict = {'Reject': new_reject} myDB.upsert("magazines", newValueDict, controlValueDict) - new_regex = kwargs.get('regex[%s]' % title.encode('latin-1'), None) + new_regex = kwargs.get('regex[%s]' % title, None) if not new_regex == regex: controlValueDict = {'Title': title} newValueDict = {'Regex': new_regex} @@ -622,7 +634,7 @@ def booksearch(self, author=None, title=None, bookid=None, action=None): if '_title' in action: searchterm = title elif '_author' in action: - searchterm = kwargs['author'] + searchterm = author else: # if '_full' in action: searchterm = '%s %s' % (author, title) searchterm = searchterm.strip()