From 6b2a1ce623815bd2e069aa8a72b4fae73e940a39 Mon Sep 17 00:00:00 2001 From: moi15moi Date: Sun, 26 Mar 2023 20:45:32 -0400 Subject: [PATCH 1/4] [font_parser] Add staticmethod for get_axis_value_table_property --- font_collector/font_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/font_collector/font_parser.py b/font_collector/font_parser.py index f66cf2a..ce97a25 100644 --- a/font_collector/font_parser.py +++ b/font_collector/font_parser.py @@ -230,6 +230,7 @@ def get_axis_value_from_coordinates( return axis_values_coordinate_matches + @staticmethod def get_axis_value_table_property( ttfont: TTFont, axis_values: List[Any], family_name_prefix: str ) -> Tuple[str, str, float, bool]: From 64996267dd1be3ea67fc04827e31cb4c36131eff Mon Sep 17 00:00:00 2001 From: moi15moi Date: Sun, 26 Mar 2023 20:48:10 -0400 Subject: [PATCH 2/4] [font_parser] Add get_decoded_name method --- font_collector/font_parser.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/font_collector/font_parser.py b/font_collector/font_parser.py index ce97a25..6780f8a 100644 --- a/font_collector/font_parser.py +++ b/font_collector/font_parser.py @@ -400,7 +400,7 @@ def get_name_by_id(nameID: int, names: List[NameRecord]) -> str: for name in names: try: - name_str = name.string.decode(FontParser.get_name_encoding(name)) + name_str = FontParser.get_decoded_name(name) except UnicodeDecodeError: continue @@ -409,6 +409,24 @@ def get_name_by_id(nameID: int, names: List[NameRecord]) -> str: raise NameNotFoundException( f"The NamingTable doesn't contain the NameID {nameID}" ) + + @staticmethod + def get_decoded_name(name: NameRecord) -> str: + """ + Parameters: + names (NameRecord): Name record from the naming record + Returns: + The decoded name + """ + + encoding = FontParser.get_name_encoding(name) + + try: + return name.string.decode(encoding) + except UnicodeDecodeError: + utf_16_decoded = name.string.decode("utf_16_be") + to_decode = bytes(utf_16_decoded, encoding="raw_unicode_escape") + return to_decode.decode(encoding) @staticmethod def get_font_postscript_property(font_path: str, font_index: int) -> Optional[str]: @@ -536,7 +554,7 @@ def get_font_family_fullname_property( ): try: - name_str = name.string.decode(FontParser.get_name_encoding(name)) + name_str = FontParser.get_decoded_name(name) except UnicodeDecodeError: continue From f3e3ecdbde5ff2092cb64658c26d7da8ac332b84 Mon Sep 17 00:00:00 2001 From: moi15moi Date: Sun, 26 Mar 2023 21:02:14 -0400 Subject: [PATCH 3/4] Add test for the fallback encoding --- tests/test_font_parser.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/test_font_parser.py diff --git a/tests/test_font_parser.py b/tests/test_font_parser.py new file mode 100644 index 0000000..14e41a5 --- /dev/null +++ b/tests/test_font_parser.py @@ -0,0 +1,24 @@ +from font_collector.font_parser import FontParser +from fontTools.ttLib.tables._n_a_m_e import NameRecord + + +def test_fallback_encoding(): + # The string are from the font in this pack: https://github.com/libass/libass/issues/643#issuecomment-1476459274 + + # 微软简标宋 - PlatEncID 3.TTF + name_record = NameRecord() + name_record.nameID = 1 + name_record.string = b'\x00\xce\x00\xa2\x00\xc8\x00\xed\x00\xbc\x00\xf2\x00\xb1\x00\xea\x00\xcb\x00\xce' + name_record.platformID = 3 + name_record.platEncID = 3 + name_record.langID = 0 + FontParser.get_decoded_name(name_record) == "微软简标宋" + + # 文鼎中特廣告體 - PlatEncID 4.ttf + name_record = NameRecord() + name_record.nameID = 1 + name_record.string = b'\x00\xa4\x00\xe5\x00\xb9\x00\xa9\x00\xa4\x00\xa4\x00\xaf\x00S\x00\xbc\x00s\x00\xa7\x00i\x00\xc5\x00\xe9' + name_record.platformID = 3 + name_record.platEncID = 4 + name_record.langID = 0 + FontParser.get_decoded_name(name_record) == "文鼎中特廣告體" \ No newline at end of file From c66760a6b1fff31bac86abfe31527f121aea9486 Mon Sep 17 00:00:00 2001 From: moi15moi Date: Sun, 26 Mar 2023 21:03:03 -0400 Subject: [PATCH 4/4] Format with Black --- font_collector/font_parser.py | 2 +- tests/test_font_parser.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/font_collector/font_parser.py b/font_collector/font_parser.py index 6780f8a..21d4e34 100644 --- a/font_collector/font_parser.py +++ b/font_collector/font_parser.py @@ -409,7 +409,7 @@ def get_name_by_id(nameID: int, names: List[NameRecord]) -> str: raise NameNotFoundException( f"The NamingTable doesn't contain the NameID {nameID}" ) - + @staticmethod def get_decoded_name(name: NameRecord) -> str: """ diff --git a/tests/test_font_parser.py b/tests/test_font_parser.py index 14e41a5..ad48ff4 100644 --- a/tests/test_font_parser.py +++ b/tests/test_font_parser.py @@ -8,7 +8,7 @@ def test_fallback_encoding(): # 微软简标宋 - PlatEncID 3.TTF name_record = NameRecord() name_record.nameID = 1 - name_record.string = b'\x00\xce\x00\xa2\x00\xc8\x00\xed\x00\xbc\x00\xf2\x00\xb1\x00\xea\x00\xcb\x00\xce' + name_record.string = b"\x00\xce\x00\xa2\x00\xc8\x00\xed\x00\xbc\x00\xf2\x00\xb1\x00\xea\x00\xcb\x00\xce" name_record.platformID = 3 name_record.platEncID = 3 name_record.langID = 0 @@ -17,8 +17,8 @@ def test_fallback_encoding(): # 文鼎中特廣告體 - PlatEncID 4.ttf name_record = NameRecord() name_record.nameID = 1 - name_record.string = b'\x00\xa4\x00\xe5\x00\xb9\x00\xa9\x00\xa4\x00\xa4\x00\xaf\x00S\x00\xbc\x00s\x00\xa7\x00i\x00\xc5\x00\xe9' + name_record.string = b"\x00\xa4\x00\xe5\x00\xb9\x00\xa9\x00\xa4\x00\xa4\x00\xaf\x00S\x00\xbc\x00s\x00\xa7\x00i\x00\xc5\x00\xe9" name_record.platformID = 3 name_record.platEncID = 4 name_record.langID = 0 - FontParser.get_decoded_name(name_record) == "文鼎中特廣告體" \ No newline at end of file + FontParser.get_decoded_name(name_record) == "文鼎中特廣告體"