Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid ResourceWarning: unclosed file #395

Merged
merged 2 commits into from
Jul 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions src/wireviz/Harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,10 @@
)
from wireviz.wv_helper import (
awg_equiv,
file_write_text,
flatten2d,
is_arrow,
mm2_equiv,
open_file_read,
open_file_write,
tuplelist2tsv,
)
from wireviz.wv_html import generate_html_output
Expand Down Expand Up @@ -172,7 +171,7 @@ def create_graph(self) -> Graph:
bgcolor=wv_colors.translate_color(self.options.bgcolor, "HEX"),
nodesep="0.33",
fontname=self.options.fontname,
)
) # TODO: Add graph attribute: charset="utf-8",
dot.attr(
"node",
shape="none",
Expand Down Expand Up @@ -658,7 +657,7 @@ def png(self):
return data.read()

@property
def svg(self):
def svg(self): # TODO?: Verify xml encoding="utf-8" in SVG?
graph = self.graph
return embed_svg_images(graph.pipe(format="svg").decode("utf-8"), Path.cwd())

Expand Down Expand Up @@ -693,7 +692,7 @@ def output(
# BOM output
bomlist = bom_list(self.bom())
if "tsv" in fmt:
open_file_write(f"{filename}.bom.tsv").write(tuplelist2tsv(bomlist))
file_write_text(f"{filename}.bom.tsv", tuplelist2tsv(bomlist))
if "csv" in fmt:
# TODO: implement CSV output (preferrably using CSV library)
print("CSV output is not yet supported")
Expand Down
4 changes: 2 additions & 2 deletions src/wireviz/svgembed.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ def embed_svg_images_file(
) -> None:
filename_in = Path(filename_in).resolve()
filename_out = filename_in.with_suffix(".b64.svg")
filename_out.write_text(
filename_out.write_text( # TODO?: Verify xml encoding="utf-8" in SVG?
embed_svg_images(filename_in.read_text(), filename_in.parent)
)
) # TODO: Use encoding="utf-8" in both read_text() and write_text()
if overwrite:
filename_out.replace(filename_in)
4 changes: 2 additions & 2 deletions src/wireviz/wireviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
from wireviz.Harness import Harness
from wireviz.wv_helper import (
expand,
file_read_text,
get_single_key_and_value,
is_arrow,
open_file_read,
smart_file_resolve,
)

Expand Down Expand Up @@ -409,7 +409,7 @@ def _get_yaml_data_and_path(inp: Union[str, Path, Dict]) -> (Dict, Path):
try:
yaml_path = Path(inp).expanduser().resolve(strict=True)
# if no FileNotFoundError exception happens, get file contents
yaml_str = open_file_read(yaml_path).read()
yaml_str = file_read_text(yaml_path)
except (FileNotFoundError, OSError, ValueError) as e:
# if inp is a long YAML string, Pathlib will normally raise
# FileNotFoundError or OSError(errno = ENAMETOOLONG) when
Expand Down
6 changes: 3 additions & 3 deletions src/wireviz/wv_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import wireviz.wireviz as wv
from wireviz import APP_NAME, __version__
from wireviz.wv_helper import open_file_read
from wireviz.wv_helper import file_read_text

format_codes = {
# "c": "csv",
Expand Down Expand Up @@ -111,7 +111,7 @@ def wireviz(file, format, prepend, output_dir, output_name, version):
raise Exception(f"File does not exist:\n{prepend_file}")
print("Prepend file:", prepend_file)

prepend_input += open_file_read(prepend_file).read() + "\n"
prepend_input += file_read_text(prepend_file) + "\n"
else:
prepend_input = ""

Expand All @@ -130,7 +130,7 @@ def wireviz(file, format, prepend, output_dir, output_name, version):
"Output file: ", f"{Path(_output_dir / _output_name)}.{output_formats_str}"
)

yaml_input = open_file_read(file).read()
yaml_input = file_read_text(file)
file_dir = file.parent

yaml_input = prepend_input + yaml_input
Expand Down
21 changes: 17 additions & 4 deletions src/wireviz/wv_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,31 @@ def clean_whitespace(inp):


def open_file_read(filename):
"""Open utf-8 encoded text file for reading - remember closing it when finished"""
# TODO: Intelligently determine encoding
return open(filename, "r", encoding="UTF-8")


def open_file_write(filename):
"""Open utf-8 encoded text file for writing - remember closing it when finished"""
return open(filename, "w", encoding="UTF-8")


def open_file_append(filename):
"""Open utf-8 encoded text file for appending - remember closing it when finished"""
return open(filename, "a", encoding="UTF-8")


def file_read_text(filename: str) -> str:
"""Read utf-8 encoded text file, close it, and return the text"""
return Path(filename).read_text(encoding="utf-8")


def file_write_text(filename: str, text: str) -> int:
"""Write utf-8 encoded text file, close it, and return the number of characters written"""
return Path(filename).write_text(text, encoding="utf-8")


def is_arrow(inp):
"""
Matches strings of one or multiple `-` or `=` (but not mixed)
Expand All @@ -144,10 +157,10 @@ def aspect_ratio(image_src):
try:
from PIL import Image

image = Image.open(image_src)
if image.width > 0 and image.height > 0:
return image.width / image.height
print(f"aspect_ratio(): Invalid image size {image.width} x {image.height}")
with Image.open(image_src) as image:
if image.width > 0 and image.height > 0:
return image.width / image.height
print(f"aspect_ratio(): Invalid image size {image.width} x {image.height}")
# ModuleNotFoundError and FileNotFoundError are the most expected, but all are handled equally.
except Exception as error:
print(f"aspect_ratio(): {type(error).__name__}: {error}")
Expand Down
12 changes: 6 additions & 6 deletions src/wireviz/wv_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from wireviz.svgembed import data_URI_base64
from wireviz.wv_gv_html import html_line_breaks
from wireviz.wv_helper import (
file_read_text,
file_write_text,
flatten2d,
open_file_read,
open_file_write,
smart_file_resolve,
)

Expand All @@ -35,14 +35,14 @@ def generate_html_output(
# fall back to built-in simple template if no template was provided
templatefile = Path(__file__).parent / "templates/simple.html"

html = open_file_read(templatefile).read()
html = file_read_text(templatefile) # TODO?: Warn if unexpected meta charset?

# embed SVG diagram (only if used)
def svgdata() -> str:
return re.sub(
return re.sub( # TODO?: Verify xml encoding="utf-8" in SVG?
"^<[?]xml [^?>]*[?]>[^<]*<!DOCTYPE [^>]*>",
"<!-- XML and DOCTYPE declarations from SVG file removed -->",
open_file_read(f"{filename}.tmp.svg").read(),
file_read_text(f"{filename}.tmp.svg"),
Copy link
Collaborator Author

@kvid kvid Jun 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This question might be slightly out of scope for this PR, but needs to be raised: Here, the SVG file is specified to be utf-8 encoded, but it's not specified at:
https://github.com/wireviz/WireViz/blob/close_files/src/wireviz/svgembed.py#L62-L64

What is correct? Should we specify utf-8 at both places, or does it really depend on what encoding is specified in the leading part of the SVG file itself?

Or doesn't the encoding matter at the latter location because it's just copying a file to another file? In that case, maybe read_bytes() and write_bytes() (or something like shutil.copyfile()) is a better alternative?

Copy link
Collaborator

@formatc1702 formatc1702 Jul 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. Since the HTML template contains <meta charset="UTF-8">, it would be best if the embedded SVG also was UTF-8. Not sure if we know or have control over how Graphviz chooses to encode its output. Are we just lucky that it's already UTF-8?

Copy link
Collaborator Author

@kvid kvid Jul 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably should assume utf-8 and specify that encoding at both places. I also found a third place where that is already done: https://github.com/wireviz/WireViz/blob/close_files/src/wireviz/Harness.py#L662

Then, we probably also should (as a minimum only where embedding SVG into HTML, or ideally at all places reading SVG) verify that the encoding property of the leading xml tag is either absent (utf-8 is default, I believe) or equal to any of the legal value variations that specify utf-8. If we detect a discrepancy, should we raise an exception or just print a warning stating e.g. that some characters might be rendered wrongly due to an unexpected encoding in the SVG file?

If we can get or create some SVG with e.g. encoding="ISO-8859-1" containg some known characters outside the common ASCII range, we could test to see the effect of assuming the wrong encoding at the different parts of our code. Then it'll be easier to describe possible consequences in a warning message.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBH, I would prefer to keep it simple here and work with the assumption, in order to finish the release of v4.1.
Verification can be done as a separate feature/PR.

Copy link
Collaborator Author

@kvid kvid Jul 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created #400 to follow up this issue. Please add your concerns and opinions there.

Update: I also added TODOs at a few code locations that might need attention about such encoding and charset issues because line number references as I've used earlier in this thread will probably not survive the #251 merge-in. 😃

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK thanks. I see some recent force-pushes so please re-request review once it's ready :)

1,
)

Expand Down Expand Up @@ -128,4 +128,4 @@ def replacement_if_used(key: str, func: Callable[[], str]) -> None:
pattern = re.compile("|".join(replacements_escaped))
html = pattern.sub(lambda match: replacements[match.group(0)], html)

open_file_write(f"{filename}.html").write(html)
file_write_text(f"{filename}.html", html)
Loading