Skip to content

Commit

Permalink
Escaping underscore characters in md export
Browse files Browse the repository at this point in the history
Signed-off-by: Maksym Lysak <[email protected]>
  • Loading branch information
Maksym Lysak committed Oct 25, 2024
1 parent b9b3c60 commit a01c59b
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions docling_core/types/doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -1291,6 +1291,17 @@ def export_to_markdown( # noqa: C901
mdtext = re.sub(
r"\n\n\n+", "\n\n", mdtext
) # remove cases of double or more empty lines.

# Our export markdown doesn't contain any emphasis styling:
# Bold, Italic, or Bold-Italic
# Hence, any underscore that we print into Markdown is coming from document text
# That means we need to escape it, to properly reflect content in the markdown
def escape_underscores(text):
# Replace "_" with "\_" only if it's not already escaped
escaped_text = re.sub(r'(?<!\\)_', r'\_', text)
return escaped_text
mdtext = escape_underscores(mdtext)

return mdtext

def export_to_text( # noqa: C901
Expand Down

0 comments on commit a01c59b

Please sign in to comment.