Skip to content

Commit

Permalink
html: preserve invalid numeric entities
Browse files Browse the repository at this point in the history
Follow behaviour of Firefox. Patch from @ccbenny (thanks!).
  • Loading branch information
rrthomas committed Sep 15, 2024
1 parent 2ec66c9 commit b26d35b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
8 changes: 8 additions & 0 deletions src/html.c
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,10 @@ transform_html_ucs2 (RECODE_SUBTASK subtask)
}
}

/* No digit found at all? */
if ((cursor - buffer) == 2)
valid = false;

if (valid)
if (request->diacritics_only)
{
Expand Down Expand Up @@ -799,6 +803,10 @@ transform_html_ucs2 (RECODE_SUBTASK subtask)
}
}

/* No digit found at all? */
if ((cursor - buffer) == 1)
valid = false;

if (valid)
if (request->diacritics_only)
{
Expand Down
18 changes: 15 additions & 3 deletions tests/t40_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,22 @@
import common
from common import setup_module, teardown_module

input = '& S&P, S & P, &'
output = '& S&P, S & P, &'

class Test:
# Stand-alone '&' must be kept as-is.
def test_1(self):
input = '& S&P, S & P, &'
output = '& S&P, S & P, &'
common.request('html..')
common.validate(input, output)
# Incomplete numeric hex entities should be kept.
def test_2(self):
input = '&#x61 &#xZZ &#x; &#x'
output = 'a &#xZZ &#x; &#x'
common.request('html..')
common.validate(input, output)
# Incomplete numeric decimal entities should be kept.
def test_3(self):
input = '&#97 &#ZZ &#; &#'
output = 'a &#ZZ &#; &#'
common.request('html..')
common.validate(input, output)

0 comments on commit b26d35b

Please sign in to comment.