Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support the superscript and subscript tags #407

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions html2text/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def __init__(
self.tag_callback = None
self.open_quote = config.OPEN_QUOTE # covered in cli
self.close_quote = config.CLOSE_QUOTE # covered in cli
self.ignore_sup_sub = config.IGNORE_SUP_SUB # covered in cli

if out is None:
self.out = self.outtextf
Expand Down Expand Up @@ -716,6 +717,12 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
self.out("\n[/code]")
self.p()

if not self.ignore_sup_sub and tag in ["sup", "sub"]:
if start:
self.o("<{}>".format(tag))
else:
self.o("</{}>".format(tag))

# TODO: Add docstring for these one letter functions
def pbr(self) -> None:
"Pretty print has a line break"
Expand Down
8 changes: 8 additions & 0 deletions html2text/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,13 @@ class bcolors:
)
p.add_argument("filename", nargs="?")
p.add_argument("encoding", nargs="?", default="utf-8")
p.add_argument(
"--no-ignore-sup-sub",
dest="ignore_sup_sub",
action="store_false",
default=config.IGNORE_SUP_SUB,
help="Ignore the sup and sub tags",
)
args = p.parse_args()

if args.filename and args.filename != "-":
Expand Down Expand Up @@ -318,5 +325,6 @@ class bcolors:
h.default_image_alt = args.default_image_alt
h.open_quote = args.open_quote
h.close_quote = args.close_quote
h.ignore_sup_sub = args.ignore_sup_sub

sys.stdout.write(h.handle(html))
3 changes: 3 additions & 0 deletions html2text/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,6 @@
# Use double quotation marks when converting the <q> tag.
OPEN_QUOTE = '"'
CLOSE_QUOTE = '"'

# Ignore the sup and sub tags
IGNORE_SUP_SUB = True
4 changes: 4 additions & 0 deletions test/sub_tag.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<p>
According to the computations by Nakamura, Johnson, and Mason<sub>1</sub> this
will result in the complete annihilation of both particles.
</p>
2 changes: 2 additions & 0 deletions test/sub_tag.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
According to the computations by Nakamura, Johnson, and Mason<sub>1</sub> this
will result in the complete annihilation of both particles.
4 changes: 4 additions & 0 deletions test/sup_tag.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<p>
One of the most common equations in all of physics is <var>E</var>=<var>m</var
><var>c</var><sup>2</sup>.
</p>
1 change: 1 addition & 0 deletions test/sup_tag.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
One of the most common equations in all of physics is E=mc<sup>2</sup>.
5 changes: 5 additions & 0 deletions test/test_html2text.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ def generate_testdata():
# CLI doesn't support baseurl.
cmdline_args = skip

if base_fn in ["sup_tag.html", "sub_tag.html"]:
module_args["ignore_sup_sub"] = False
cmdline_args.append("--no-ignore-sup-sub")
func_args = skip

yield fn, module_args, cmdline_args, func_args


Expand Down
Loading