Skip to content

Commit

Permalink
[UK] Fix broken table parsing.
Browse files Browse the repository at this point in the history
  • Loading branch information
dracos authored and TheyWorkForYou Live CVS User committed Sep 5, 2020
1 parent e038045 commit cb2569b
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
4 changes: 2 additions & 2 deletions pyscraper/gidmatching.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def PrepareXMLForDiff(scrapeversion):
assert chk[0] == chk[3] # chunk type (this can fail if due to the lack of two \n's between the two labels, and thus detects an empty speech, which should not be there.
# new_chk = chk[2]
new_chk = re.sub(
'(?s)(<p\s[^>]*>)(.*?)(<\/p>)',
lambda m: (u''.join((m.group(1), re.sub('\n', ' ', m.group(2)), m.group(3)))),
r'(?s)(<(p|tr)\s[^>]*>)(.*?)(<\/\2>)',
lambda m: (u''.join((m.group(1), re.sub('\n', ' ', m.group(3)), m.group(4)))),
chk[2]
)
essxindx.append(len(essxlist))
Expand Down
11 changes: 7 additions & 4 deletions pyscraper/new_hansard.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,20 +763,23 @@ def parse_votelist(self, votes, direction, vote_list, is_teller=False):
return vote_list

def parse_table(self, wrapper):
rows = wrapper.xpath('.//row')
rows = wrapper.xpath('.//ns:row', namespaces=self.ns_map)
tag = etree.Element('table')
body = etree.Element('tbody')
url = None
for row in rows:
row_tag = etree.Element('tr')
row_tag.set('pid', self.get_pid())

for entry in row.xpath('(.//hs_brev|.//hs_Para)'):
for entry in row.xpath('(.//ns:hs_brev|.//ns:hs_Para)', namespaces=self.ns_map):
if url is None:
url = entry.get('url')
row_tag.append(list(entry))
td_tag = etree.Element('td')
td_tag.text = self.get_single_line_text_from_element(entry)
row_tag.append(td_tag)

body.append(row_tag)
if len(row_tag):
body.append(row_tag)

tag.append(body)

Expand Down

0 comments on commit cb2569b

Please sign in to comment.