Skip to content

Commit

Permalink
Add markdown syntax check
Browse files Browse the repository at this point in the history
Fixes #2495

Signed-off-by: Michal Čihař <[email protected]>
  • Loading branch information
nijel committed Feb 7, 2019
1 parent 33732ec commit b878711
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 0 deletions.
13 changes: 13 additions & 0 deletions docs/user/checks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,20 @@ Markdown links do not match source.

`Markdown links`_


.. _check-md-syntax:

Markdown syntax
~~~~~~~~~~~~~~~

Markdown syntax does not match source

.. seealso::

`Markdown span elements`_

.. _Markdown links: https://daringfireball.net/projects/markdown/syntax#link
.. _Markdown span elements: https://daringfireball.net/projects/markdown/syntax#span

Source checks
-------------
Expand Down
51 changes: 51 additions & 0 deletions weblate/checks/markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,19 @@
r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
r')\]\s*\[([^^\]]*)\]'
)
MD_SYNTAX = re.compile(
r'(_{2})(?:[\s\S]+?)_{2}(?!_)' # __word__
r'|'
r'(\*{2})(?:[\s\S]+?)\*{2}(?!\*)' # **word**
r'|'
r'\b(_)(?:(?:__|[^_])+?)_\b' # _word_
r'|'
r'(\*)(?:(?:\*\*|[^\*])+?)\*(?!\*)' # *word*
r'|'
r'(`+)\s*(?:[\s\S]*?[^`])\s*\5(?!`)' # `code`
r'|'
r'(~~)(?=\S)(?:[\s\S]*?\S)~~' # ~~word~~
)

XML_MATCH = re.compile(r'<[^>]+>')
XML_ENTITY_MATCH = re.compile(r'&#?\w+;')
Expand Down Expand Up @@ -230,3 +243,41 @@ def check_single(self, source, target, unit):
# We don't check actual link targets as those might
# be localized as well (consider links to Wikipedia)
return len(src_match) != len(tgt_match)


class MarkdownSyntaxCheck(MarkdownBaseCheck):
check_id = 'md-syntax'
name = _('Markdown syntax')
description = _('Markdown syntax does not match source')

@staticmethod
def extract_match(match):
for i in range(6):
if match[i]:
return match[i]
return None

def check_single(self, source, target, unit):
src_match = MD_SYNTAX.findall(source)
tgt_match = MD_SYNTAX.findall(target)

src_tags = {self.extract_match(x) for x in src_match}
tgt_tags = {self.extract_match(x) for x in tgt_match}

return src_tags != tgt_tags

def check_highlight(self, source, unit):
if self.should_skip(unit):
return []
ret = []
for match in MD_SYNTAX.finditer(source):
value = ''
for i in range(6):
value = match.group(i + 1)
if value:
break
start = match.start()
end = match.end()
ret.append((start, start + len(value), value))
ret.append((end - len(value), end, value))
return ret
1 change: 1 addition & 0 deletions weblate/checks/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class WeblateChecksConf(AppConf):
'weblate.checks.markup.XMLTagsCheck',
'weblate.checks.markup.MarkdownRefLinkCheck',
'weblate.checks.markup.MarkdownLinkCheck',
'weblate.checks.markup.MarkdownSyntaxCheck',
'weblate.checks.source.OptionalPluralCheck',
'weblate.checks.source.EllipsisCheck',
'weblate.checks.source.MultipleFailingCheck',
Expand Down
26 changes: 26 additions & 0 deletions weblate/checks/tests/test_markup_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
XMLValidityCheck,
MarkdownRefLinkCheck,
MarkdownLinkCheck,
MarkdownSyntaxCheck,
)
from weblate.checks.tests.test_checks import CheckTestCase

Expand Down Expand Up @@ -180,3 +181,28 @@ def setUp(self):
'[Použij Weblate]',
'md-text'
)


class MarkdownSyntaxCheckTest(CheckTestCase):
check = MarkdownSyntaxCheck()

def setUp(self):
super(MarkdownSyntaxCheckTest, self).setUp()
self.test_good_matching = ('**string**', '**string**', 'md-text')
self.test_good_none = ('string', 'string', 'md-text')
self.test_good_flag = ('**string**', 'string', '')
self.test_failure_1 = ('**string**', '*string*', 'md-text')
self.test_failure_2 = ('~~string~~', '*string*', 'md-text')
self.test_failure_3 = ('_string_', '*string*', 'md-text')
self.test_highlight = (
'md-text',
'**string** ~~strike~~ `code`',
[
(0, 2, '**'),
(8, 10, '**'),
(11, 13, '~~'),
(19, 21, '~~'),
(22, 23, '`'),
(27, 28, '`'),
]
)
1 change: 1 addition & 0 deletions weblate/settings_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,7 @@
# 'weblate.checks.markup.XMLTagsCheck',
# 'weblate.checks.markup.MarkdownRefLinkCheck',
# 'weblate.checks.markup.MarkdownLinkCheck',
# 'weblate.checks.markup.MarkdownSyntaxCheck',
# 'weblate.checks.source.OptionalPluralCheck',
# 'weblate.checks.source.EllipsisCheck',
# 'weblate.checks.source.MultipleFailingCheck',
Expand Down

0 comments on commit b878711

Please sign in to comment.