Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

on SyntaxError, log error and retry with parser option recover=True #38

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions src/oaipmh/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from lxml import etree
import time
import codecs
import logging
logger = logging.getLogger( __name__ )

from oaipmh import common, metadata, validation, error
from oaipmh.datestamp import datestamp_to_datetime, datetime_to_datestamp
Expand Down Expand Up @@ -107,7 +109,7 @@ def ignoreBadCharacters(self, true_or_false):
"""
self._ignore_bad_character_hack = true_or_false

def parse(self, xml):
def parse(self, xml, recover=False):
"""Parse the XML to a lxml tree.
"""
# XXX this is only safe for UTF-8 encoded content,
Expand All @@ -122,7 +124,7 @@ def parse(self, xml):
if hasattr(xml, "encode"):
xml = xml.encode("utf-8")
# xml = xml.encode("utf-8")
return etree.XML(xml)
return etree.XML(xml, etree.XMLParser(recover=recover))

# implementation of the various methods, delegated here by
# handleVerb method
Expand Down Expand Up @@ -300,8 +302,20 @@ def makeRequestErrorHandling(self, **kw):
xml = self.makeRequest(**kw)
try:
tree = self.parse(xml)
except SyntaxError:
raise error.XMLSyntaxError(kw)
except SyntaxError as e:
try: # try again
logger.warning( e )
logger.warning( 'attempting to recover...')
tree = self.parse(xml, recover=True )
ids = tree.xpath( '/oai:OAI-PMH/oai:ListRecords/oai:record/oai:header/oai:identifier', namespaces=self.getNamespaces() )
if len(ids) == 1:
logger.warning("Recoverable parse error on: {0}".format( ids[0].text ))
else:
logger.warning(
"Recoverable parse error on one or more of:\n {0}".format(
"\n".join([ id.text for id in ids ])))
except SyntaxError: # can't recover
raise error.XMLSyntaxError(kw)
# check whether there are errors first
e_errors = tree.xpath('/oai:OAI-PMH/oai:error',
namespaces=self.getNamespaces())
Expand All @@ -320,6 +334,7 @@ def makeRequestErrorHandling(self, **kw):
code, msg))
# find exception in error module and raise with msg
raise getattr(error, code[0].upper() + code[1:] + 'Error')(msg)

return tree

def makeRequest(self, **kw):
Expand Down