Skip to content

Commit

Permalink
Merge pull request #26 from moi15moi/get-missing-glyphs-use-freetype-…
Browse files Browse the repository at this point in the history
…instead-of-fonttools

Get missing glyphs - Use freetype instead of fonttools
- Deal correctly with symbol cmap by trying to guess the encoding.
- Deal correctly with unicode cmap. We previously correctly deal with those, but a bug was introduce in 2f57e0f
  • Loading branch information
moi15moi authored Oct 8, 2023
2 parents 438b8e1 + 63c34c8 commit c245aa3
Show file tree
Hide file tree
Showing 6 changed files with 283 additions and 34 deletions.
96 changes: 74 additions & 22 deletions font_collector/font.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,21 @@
import os
from .exceptions import InvalidFontException
from .font_parser import FontParser, NameID
from fontTools.ttLib.tables._c_m_a_p import CmapSubtable
from ctypes import byref
from fontTools.ttLib.ttFont import TTFont
from fontTools.ttLib.ttCollection import TTCollection
from freetype import (
FT_Done_Face,
FT_Done_FreeType,
FT_Exception,
FT_Face,
FT_Get_Char_Index,
FT_Get_CMap_Format,
FT_Init_FreeType,
FT_Library,
FT_New_Memory_Face,
FT_Set_Charmap,
)
from typing import Any, Dict, List, Sequence, Set, Tuple

_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -243,54 +255,94 @@ def __hash__(self):
def __repr__(self):
return f'Filename: "{self.filename}" Family_names: "{self.family_names}", Weight: "{self.weight}", Italic: "{self.italic}, Exact_names: "{self.exact_names}", Named_instance_coordinates: "{self.named_instance_coordinates}"'

def get_missing_glyphs(self, text: Sequence[str]) -> Set[str]:
def get_missing_glyphs(
self,
text: Sequence[str],
support_only_ascii_char_for_symbol_font: bool = False
) -> Set[str]:
"""
Parameters:
text (Sequence[str]): Text
support_only_ascii_char_for_symbol_font (bool):
Libass only support ascii character for symbol cmap, but VSFilter can support more character.
If you wish to use libass, we recommand you to set this param to True.
If you wish to use VSFilter, we recommand you to set this param to False.
For more detail, see the issue: https://github.com/libass/libass/issues/319
Returns:
A set of all the character that the font cannot display.
"""

ttFont = TTFont(self.filename, fontNumber=self.font_index)
char_not_found: Set[str] = set()

cmap_tables: List[CmapSubtable] = list(
filter(lambda table: table.platformID == 3, ttFont["cmap"].tables)
)
library = FT_Library()
face = FT_Face()

error = FT_Init_FreeType(byref(library))
if error: raise FT_Exception(error)

# We cannot use FT_New_Face due to this issue: https://github.com/rougier/freetype-py/issues/157
with open(self.filename, mode="rb") as f:
filebody = f.read()
error = FT_New_Memory_Face(library, filebody, len(filebody), self.font_index, byref(face))
if error: raise FT_Exception(error)

supported_charmaps = [face.contents.charmaps[i] for i in range(face.contents.num_charmaps) if FT_Get_CMap_Format(face.contents.charmaps[i]) != -1 and face.contents.charmaps[i].contents.platform_id == 3]

# GDI seems to take apple cmap if there isn't any microsoft cmap: https://github.com/libass/libass/issues/679
if len(cmap_tables) == 0:
cmap_tables = list(
filter(
lambda table: table.platformID == 1 and table.platEncID == 0,
ttFont["cmap"].tables,
)
)
if len(supported_charmaps) == 0:
supported_charmaps = [face.contents.charmaps[i] for i in range(face.contents.num_charmaps) if FT_Get_CMap_Format(face.contents.charmaps[i]) != -1 and face.contents.charmaps[i].contents.platform_id == 1 and face.contents.charmaps[i].contents.encoding_id == 0]

for char in text:
char_found = False

for cmap_table in cmap_tables:
cmap_encoding = FontParser.get_cmap_encoding(cmap_table)
for charmap in supported_charmaps:
error = FT_Set_Charmap(face, charmap)
if error: raise FT_Exception(error)

platform_id = charmap.contents.platform_id
encoding_id = charmap.contents.encoding_id

# Cmap isn't supported
cmap_encoding = FontParser.get_cmap_encoding(platform_id, encoding_id)

# cmap not supported
if cmap_encoding is None:
continue

try:
codepoint = int.from_bytes(char.encode(cmap_encoding), "big")
except UnicodeEncodeError:
continue
if cmap_encoding == "unicode":
codepoint = ord(char)
else:
if cmap_encoding == "unknown":
if platform_id == 3 and encoding_id == 0:
if support_only_ascii_char_for_symbol_font and not char.isascii():
continue
cmap_encoding = FontParser.get_symbol_cmap_encoding(face)

if cmap_encoding is None:
# Fallback if guess fails
cmap_encoding = "cp1252"
else:
# cmap not supported
continue

try:
codepoint = int.from_bytes(char.encode(cmap_encoding), "big")
except UnicodeEncodeError:
continue

# GDI/Libass modify the codepoint for microsoft symbol cmap: https://github.com/libass/libass/blob/04a208d5d200360d2ac75f8f6cfc43dd58dd9225/libass/ass_font.c#L249-L250
if cmap_table.platformID == 3 and cmap_table.platEncID == 0:
if platform_id == 3 and encoding_id == 0:
codepoint = 0xF000 | codepoint

if codepoint in cmap_table.cmap:
index = FT_Get_Char_Index(face, codepoint)

if index:
char_found = True
break

if not char_found:
char_not_found.add(char)

FT_Done_Face(face)
FT_Done_FreeType(library)

return char_not_found
Loading

0 comments on commit c245aa3

Please sign in to comment.