Skip to content

Commit

Permalink
add config entries for application handlers
Browse files Browse the repository at this point in the history
  • Loading branch information
billyeh committed Aug 30, 2013
1 parent 6693618 commit 23daf6f
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 6 deletions.
4 changes: 2 additions & 2 deletions lib/argconfig_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def __init__(self, arglist):
to the current directory "."')
self.parser.add_argument('--document-type', '-d', type=str, nargs='?',
default='grant',
help='Choose whether your files are patent grants or applications. \
Defaults to patent grants.')
help='Set the type of patent document to be parsed: grant (default) \
or application')

# parse arguments and assign values
args = self.parser.parse_args(self.arglist)
Expand Down
6 changes: 4 additions & 2 deletions lib/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,17 @@ def get_dates(yearstring):
return years


def get_xml_handlers(configfile):
def get_xml_handlers(configfile, document_type='grant'):
"""
Called by parse.py to generate a lookup dictionary for which parser should
be used for a given file
"""
handler = ConfigParser()
handler.read(configfile)
xmlhandlers = {}
for yearrange, handler in handler.items('xml-handlers'):
config_item = 'grant-xml-handlers' if document_type == 'grant' \
else 'application-xml-handlers'
for yearrange, handler in handler.items(config_item):
for year in get_dates(yearrange):
try:
xmlhandlers[year] = importlib.import_module(handler)
Expand Down
3 changes: 2 additions & 1 deletion parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import shutil
from lib.config_parser import get_xml_handlers

xmlhandlers = get_xml_handlers('process.cfg')
logfile = "./" + 'xml-parsing.log'
logging.basicConfig(filename=logfile, level=logging.DEBUG)
commit_frequency = alchemy.get_config().get('parse').get('commit_frequency')
Expand Down Expand Up @@ -173,5 +172,7 @@ def main(patentroot, xmlregex, verbosity, output_directory='.'):
PATENTROOT = args.get_patentroot()
VERBOSITY = args.get_verbosity()
PATENTOUTPUTDIR = args.get_output_directory()
DOCUMENTTYPE = args.get_document_type()
xmlhandlers = get_xml_handlers('process.cfg', DOCUMENTTYPE)

main(PATENTROOT, XMLREGEX, VERBOSITY, PATENTOUTPUTDIR)
5 changes: 4 additions & 1 deletion process.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ datadir=/Users/gabe
# format `ipgYYMMDD` is assumed), then the default parser is used.
# The dates in the ranges are either YYYY or YYYYMMDD. If only one date is provided,
# then the corresponding handler is assumed for all subsequent patents
[xml-handlers]
[grant-xml-handlers]
2005-20130108=lib.handlers.grant_handler_v42
20130115=lib.handlers.grant_handler_v44
default=lib.handlers.grant_handler_v42

[application-xml-handlers]
default=lib.handlers.application_handler_v1

0 comments on commit 23daf6f

Please sign in to comment.