Skip to content

Commit

Permalink
bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
mtill committed Oct 4, 2013
1 parent d4147b9 commit 48ccb95
Showing 1 changed file with 17 additions and 15 deletions.
32 changes: 17 additions & 15 deletions MailWebsiteChanges.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def parseSite(uri, contenttype, contentxpath, titlexpath, contentregex, titlereg
tree = etree.parse(file, parser)
file.close()

contentresult = [] if contentxpath == '' else tree.xpath(contentxpath)
titleresult = [] if titlexpath == '' else tree.xpath(titlexpath)
contentresult = tree.xpath(contentxpath) if contentxpath else []
titleresult = tree.xpath(titlexpath) if titlexpath else []

if contenttype == 'html':
basetaglist = tree.xpath('/html/head/base')
Expand All @@ -81,40 +81,42 @@ def parseSite(uri, contenttype, contentxpath, titlexpath, contentregex, titlereg
toAbsoluteURIs(titleresult, baseuri)

if contentxpath != '' and titlexpath != '' and len(contentresult) != len(titleresult):
warning = 'WARNING: number of title blocks (' + len(titleresult) + ') does not match number of content blocks (' + len(contentresult) + ')'
elif contentxpath != '' and len(contentresult) == 0:
warning = 'WARNING: number of title blocks (' + str(len(titleresult)) + ') does not match number of content blocks (' + str(len(contentresult)) + ')'
elif contentxpath and len(contentresult) == 0:
warning = 'WARNING: content selector became invalid!'
elif titlexpath != '' and len(titleresult) == 0:
elif titlexpath and len(titleresult) == 0:
warning = 'WARNING: title selector became invalid!'
else:
if len(contentresult) == 0:
contentresult = titleresult
if len(titleresult) == 0:
titleresult = contentresult

contents = [etree.tostring(s, encoding=defaultEncoding, pretty_print=True).decode(defaultEncoding) for s in contentresult]
titles = [getSubject(etree.tostring(s, encoding=defaultEncoding, method='text').decode(defaultEncoding)) for s in titleresult]
contents = [etree.tostring(s, encoding=defaultEncoding, pretty_print=True).decode(defaultEncoding) for s in contentresult]
titles = [getSubject(etree.tostring(s, encoding=defaultEncoding, method='text').decode(defaultEncoding)) for s in titleresult]

except IOError as e:
warning = 'WARNING: could not open URL; maybe content was moved?\n\n' + str(e)
return {'content': content, 'warning': warning}

if contentregex != '':
if warning:
return {'content': content, 'titles': titles, 'warning': warning}

if contentregex:
contents = [x for y in [re.findall(r'' + contentregex, c) for c in contents] for x in y]
if titleregex != '':
if titleregex:
titles = [x for y in [re.findall(r'' + titleregex, c) for c in titles] for x in y]

if contentregex != '' and titleregex != '' and len(contents) != len(titles):
warning = 'WARNING: number of title blocks (' + len(titles) + ') does not match number of content blocks (' + len(contents) + ') after regex'
elif contentregex != '' and len(contents) == 0:
if contentregex and titleregex and len(contents) != len(titles):
warning = 'WARNING: number of title blocks (' + str(len(titles)) + ') does not match number of content blocks (' + str(len(contents)) + ') after regex'
elif contentregex and len(contents) == 0:
warning = 'WARNING: content regex became invalid!'
elif titleregex != '' and len(titles) == 0:
elif titleregex and len(titles) == 0:
warning = 'WARNING: title regex became invalid!'
else:
if len(contents) == 0:
contents = titles
if len(titles) == 0:
titles = contents
titles = [getSubject(c) for c in contents]

return {'contents': contents, 'titles': titles, 'warning': warning}

Expand Down

0 comments on commit 48ccb95

Please sign in to comment.