Skip to content

Commit

Permalink
dry-run option (replaces SelectorTest.py)
Browse files Browse the repository at this point in the history
  • Loading branch information
mtill committed Oct 6, 2013
1 parent 48ccb95 commit cb8ca95
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 14 deletions.
28 changes: 19 additions & 9 deletions MailWebsiteChanges.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,26 +251,36 @@ def pollWebsites():
if __name__ == "__main__":

configMod = 'config'
dryrun = None

try:
opts, args = getopt.getopt(sys.argv[1:], 'hc:', ['help', 'config='])
opts, args = getopt.getopt(sys.argv[1:], 'hc:d:', ['help', 'config=', 'dry-run='])
except getopt.GetoptError:
print('Usage: MailWebsiteChanges.py --config=config')
print('Usage: MailWebsiteChanges.py --config=config --dry-run=shortname')
sys.exit(1)
for opt, arg in opts:
if opt == '-h':
print('Usage: MailWebsiteChanges.py --config=config')
exit()
elif opt in ('-c', '--config'):
configMod = arg
elif opt in ('-d', '--dry-run'):
dryrun = arg

config = importlib.import_module(configMod)

try:
pollWebsites()
except:
msg = str(sys.exc_info()[0]) + '\n\n' + traceback.format_exc()
print(msg)
if config.receiver != '':
sendmail('[MailWebsiteChanges] Something went wrong ...', msg, False, None)
if dryrun:
for site in config.sites:
if site['shortname'] == dryrun:
parseResult = parseSite(site['uri'], site.get('type', 'html'), site.get('contentxpath', ''), site.get('titlexpath', ''), site.get('contentregex', ''),site.get('titleregex', ''), site.get('encoding', defaultEncoding))
print(parseResult)
break
else:
try:
pollWebsites()
except:
msg = str(sys.exc_info()[0]) + '\n\n' + traceback.format_exc()
print(msg)
if config.receiver != '':
sendmail('[MailWebsiteChanges] Something went wrong ...', msg, False, None)

7 changes: 2 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,14 @@ sites = [
{'shortname': 'mywebsite1',
'uri': 'http://www.mywebsite1.com/info',
'type': 'html',
'titlexpath': '//h1',
'contentxpath': '//div',
'titleregex': '',
'contentregex': '',
'encoding': 'utf-8'},

{'shortname': 'mywebsite2',
'uri': 'http://www.mywebsite2.com/info',
'type': 'html',
'contentxpath': '//*[contains(concat(\' \', normalize-space(@class), \' \'), \' news-list-container \')]',
'regex': '',
'titlexpath': '//title',
'encoding': 'utf-8'},

{'shortname': 'mywebsite3',
Expand All @@ -56,7 +53,7 @@ sites = [
* encoding
+ Character encoding of the website, e.g., 'utf-8'.

<em>SelectorTest.py</em> might be useful in order to test the definitions before integrating them into the config file.
The <em>--dry-run="shortname"</em> option might be useful in order to validate and fine-tune a definition.

If you would like to keep the data stored in a different place than the working directory, you can include something like this:
<pre>
Expand Down

0 comments on commit cb8ca95

Please sign in to comment.