diff --git a/font_collector/font_parser.py b/font_collector/font_parser.py index f66cf2a..21d4e34 100644 --- a/font_collector/font_parser.py +++ b/font_collector/font_parser.py @@ -230,6 +230,7 @@ def get_axis_value_from_coordinates( return axis_values_coordinate_matches + @staticmethod def get_axis_value_table_property( ttfont: TTFont, axis_values: List[Any], family_name_prefix: str ) -> Tuple[str, str, float, bool]: @@ -399,7 +400,7 @@ def get_name_by_id(nameID: int, names: List[NameRecord]) -> str: for name in names: try: - name_str = name.string.decode(FontParser.get_name_encoding(name)) + name_str = FontParser.get_decoded_name(name) except UnicodeDecodeError: continue @@ -409,6 +410,24 @@ def get_name_by_id(nameID: int, names: List[NameRecord]) -> str: f"The NamingTable doesn't contain the NameID {nameID}" ) + @staticmethod + def get_decoded_name(name: NameRecord) -> str: + """ + Parameters: + names (NameRecord): Name record from the naming record + Returns: + The decoded name + """ + + encoding = FontParser.get_name_encoding(name) + + try: + return name.string.decode(encoding) + except UnicodeDecodeError: + utf_16_decoded = name.string.decode("utf_16_be") + to_decode = bytes(utf_16_decoded, encoding="raw_unicode_escape") + return to_decode.decode(encoding) + @staticmethod def get_font_postscript_property(font_path: str, font_index: int) -> Optional[str]: """ @@ -535,7 +554,7 @@ def get_font_family_fullname_property( ): try: - name_str = name.string.decode(FontParser.get_name_encoding(name)) + name_str = FontParser.get_decoded_name(name) except UnicodeDecodeError: continue diff --git a/tests/test_font_parser.py b/tests/test_font_parser.py new file mode 100644 index 0000000..ad48ff4 --- /dev/null +++ b/tests/test_font_parser.py @@ -0,0 +1,24 @@ +from font_collector.font_parser import FontParser +from fontTools.ttLib.tables._n_a_m_e import NameRecord + + +def test_fallback_encoding(): + # The string are from the font in this pack: https://github.com/libass/libass/issues/643#issuecomment-1476459274 + + # 微软简标宋 - PlatEncID 3.TTF + name_record = NameRecord() + name_record.nameID = 1 + name_record.string = b"\x00\xce\x00\xa2\x00\xc8\x00\xed\x00\xbc\x00\xf2\x00\xb1\x00\xea\x00\xcb\x00\xce" + name_record.platformID = 3 + name_record.platEncID = 3 + name_record.langID = 0 + FontParser.get_decoded_name(name_record) == "微软简标宋" + + # 文鼎中特廣告體 - PlatEncID 4.ttf + name_record = NameRecord() + name_record.nameID = 1 + name_record.string = b"\x00\xa4\x00\xe5\x00\xb9\x00\xa9\x00\xa4\x00\xa4\x00\xaf\x00S\x00\xbc\x00s\x00\xa7\x00i\x00\xc5\x00\xe9" + name_record.platformID = 3 + name_record.platEncID = 4 + name_record.langID = 0 + FontParser.get_decoded_name(name_record) == "文鼎中特廣告體"