Skip to content

Commit

Permalink
Merge pull request #196 from HinTak/variant-selector-addition
Browse files Browse the repository at this point in the history
API for Unicode Variation Sequences, from https://freetype.org/freety…
  • Loading branch information
HinTak authored Jun 18, 2024
2 parents 2f8f7d8 + 11101e6 commit d996824
Show file tree
Hide file tree
Showing 3 changed files with 372 additions and 0 deletions.
309 changes: 309 additions & 0 deletions examples/unicode-variation-sequences.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,309 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# The test file used and known to work is v2.004 of https://github.com/adobe-fonts/source-han-sans/blob/release/OTF/Japanese/SourceHanSans-Regular.otf

import io
import os
import traceback

from typing import Dict

from fontTools.ttLib import TTFont

from ctypes import byref
from freetype import (
FT_Done_Face,
FT_Done_FreeType,
FT_Exception,
FT_Face,
FT_Get_First_Char,
FT_Get_Next_Char,
FT_Init_FreeType,
FT_Library,
FT_New_Memory_Face,
FT_UInt,
FT_Get_Char_Index,
FT_Face_GetVariantSelectors,
FT_Face_GetCharsOfVariant,
FT_Face_GetCharVariantIndex,
FT_Face_GetCharVariantIsDefault
)

_ALL_IVS_NUMBERS = [x for x in range( 0xe0100, 0xe01f0)] # Supplemental
_ALL_IVS_NUMBERS += [x for x in range( 0xfe00, 0xfe10)] # IVS

def _read_cmap_uvs(uvsDict):
global _ALL_IVS_NUMBERS

all_characters = {}
all_default_characters = []
for item, item_list in uvsDict.items():
ivs_val = int(item)
if ivs_val in _ALL_IVS_NUMBERS:
ivs_chr = chr(ivs_val)
for character_tuple in item_list:
character_val = character_tuple[0]
glyph_name = character_tuple[1]
if glyph_name:
if glyph_name == '.notdef':
continue

character = chr(int(character_val))
ivs_character = character + ivs_chr
assert ivs_character not in all_characters
all_characters[ivs_character] = glyph_name
else:
character = chr(int(character_val))
ivs_character = character + ivs_chr
all_default_characters.append(ivs_character)

return all_characters, all_default_characters

def _read_character(character_value, encoding):
if encoding == 'utf_16_be':
character_string = chr(character_value)
else:
return None

return character_string

_CMAP_PRIORITY_LIST = [
(3, 10), # Windows Unicode full repertoire
(0, 6), # Unicode full repertoire (format 13 subtable)
(0, 4), # Unicode 2.0 full repertoire
(3, 1), # Windows Unicode BMP
(0, 3), # Unicode 2.0 BMP
(0, 2), # Unicode ISO/IEC 10646
(0, 1), # Unicode 1.1
(0, 0) # Unicode 1.0
]

def read_fonttools_cmap(font) -> Dict[str, str]:
global _CMAP_PRIORITY_LIST

assert isinstance(font, TTFont)
if not hasattr(font["cmap"], 'tables'):
return None

all_characters = {}
all_default_characters = []
best_read_index = None
all_tables = font["cmap"].tables
for table in all_tables:
encoding = table.getEncoding()
if not encoding:
continue
if encoding != 'utf_16_be':
continue
try:
if table.format == 14:
if hasattr(table, 'uvsDict'):
all_uvs_data, default_characters = _read_cmap_uvs(table.uvsDict)
all_default_characters += default_characters
for character, glyphname in all_uvs_data.items():
if character in all_characters:
assert all_characters[character] == glyphname
else:
all_characters[character] = glyphname
else:
print('Unknown CMAP Format 14: {}:'.format(vars(table)))

elif hasattr(table, 'cmap'):
tuple_value = (table.platformID, table.platEncID)
if tuple_value in _CMAP_PRIORITY_LIST:
index_value = _CMAP_PRIORITY_LIST.index(tuple_value)
if best_read_index:
if index_value < best_read_index:
best_read_index = index_value
else:
continue
else:
best_read_index = index_value

all_items = table.cmap.items()
length = len(all_items)
if length == 0:
if table.format != 6:
print('Unknown CMAP Format {}: {}:'.format(table.format, vars(table)))

for item in all_items:
character = _read_character(item[0], encoding)
glyphname = item[1]
if glyphname == '.notdef':
continue

if character is not None:
if character in all_characters:
if all_characters[character] != glyphname:
all_characters[character] = glyphname
else:
all_characters[character] = glyphname

except:
traceback.print_exc()
continue

if all_default_characters:
for ivs_character in all_default_characters:
first_character = ivs_character[0]
if first_character in all_characters:
glyphname = all_characters[first_character]
all_characters[ivs_character] = glyphname

return all_characters

def read_freetype_cmap(face: FT_Face) -> Dict[str, int]:
platID = face.contents.charmap.contents.platform_id
encodingID = face.contents.charmap.contents.encoding_id
if platID == 3:
if encodingID not in [1, 10]:
return {}

elif platID == 0: # all unicode
pass

else: # everything else
return {}

all_characters = []
gindex = FT_UInt()
charcode = FT_Get_First_Char( face, byref(gindex) )
while gindex.value != 0:
character = chr(charcode)
all_characters.append(character)
charcode = FT_Get_Next_Char( face, charcode, byref(gindex) )

variant_selectors_list = FT_Face_GetVariantSelectors(face)
if bool(variant_selectors_list):
all_selectors = []
selector_value = variant_selectors_list[0]
index = 0

while selector_value != 0:
all_selectors.append(selector_value)

index += 1
selector_value = variant_selectors_list[index]

for selector_value in all_selectors:
character_value_list = FT_Face_GetCharsOfVariant(face, selector_value)
assert(bool(character_value_list))
character_value = character_value_list[0]
index = 0

while character_value != 0:
character = chr(character_value) + chr(selector_value)
all_characters.append(character)

index += 1
character_value = character_value_list[index]

character_to_glyphID = {}
for character in all_characters:
if len(character) == 2:
character_value = ord(character[0])
selector_value = ord(character[1])
glyphID = FT_Face_GetCharVariantIndex(face, character_value, selector_value)
if glyphID != 0:
assert character not in character_to_glyphID
character_to_glyphID[character] = glyphID
else:
assert len(character) == 1
character_value = ord(character)
glyphID = FT_Get_Char_Index(face, character_value)
if glyphID != 0:
assert character not in character_to_glyphID
character_to_glyphID[character] = glyphID

return character_to_glyphID

def _convert_character_to_hex(text: str):
assert len(text) == 1
value = ord(text)
if 0x0000 <= value <= 0xFFFF:
assert len(hex(value)) <= 6
return '{0:04x}'.format(value)
elif value <= 0xFFFFF:
assert len(hex(value)) <= 7
return '{0:05x}'.format(value)
elif value <= 0xFFFFFF:
assert len(hex(value)) <= 8
return '{0:06x}'.format(value)
elif value <= 0xFFFFFFF:
assert len(hex(value)) <= 9
return '{0:07x}'.format(value)
elif value <= 0xFFFFFFFF:
assert len(hex(value)) <= 9
return '{0:08x}'.format(value)
else:
raise RuntimeError()

def convert_string_to_hex(text: str):
assert isinstance(text, str)
result = ''
for count, character in enumerate(text):
if count > 0:
result += '-{}'.format(_convert_character_to_hex(character))
else:
result += '{}'.format(_convert_character_to_hex(character))

return result

if __name__ == "__main__":
directory = os.path.dirname(__file__)
font_path = os.path.join(directory, 'SourceHanSans-Regular.otf')
memory_file = io.BytesIO()
with open(font_path, 'rb') as fontfile:
memory_file.write(fontfile.read())
memory_file.seek(0)

fonttools_font = TTFont(memory_file, 0, allowVID=0,
ignoreDecompileErrors=True,
fontNumber=-1)

library = FT_Library()
error = FT_Init_FreeType(byref(library))
if error: raise FT_Exception(error)

freetype_face = FT_Face()
data = memory_file.getvalue()
error = FT_New_Memory_Face(library, data, len(data), 0, byref(freetype_face))
if error: raise FT_Exception(error)

all_freetype_characters = read_freetype_cmap(freetype_face)
all_fonttools_characters = read_fonttools_cmap(fonttools_font)

print('Read {} Free Type Characters'.format(len(all_freetype_characters)))
print('Read {} Font Tools Characters'.format(len(all_fonttools_characters)))

print('Checking Mapping')

for character, glyphID in all_freetype_characters.items():
glyphname = fonttools_font.getGlyphName(glyphID)
if character in all_fonttools_characters:
ft_glyphname = all_fonttools_characters[character]
if ft_glyphname != glyphname:
character_hex = convert_string_to_hex(character)
print('Glyph Mismatch: {} Free Type: {} Font Tools: {}'.format(character_hex, glyphname, ft_glyphname))

else:
character_hex = convert_string_to_hex(character)
print('Glyph Missing in Font Tools: {}'.format(character_hex))

for character, glyphname in all_fonttools_characters.items():
if character in all_freetype_characters:
ft_glyphID = all_freetype_characters[character]
ft_glyphname = fonttools_font.getGlyphName(ft_glyphID)
if ft_glyphname != glyphname:
character_hex = convert_string_to_hex(character)
print('Glyph Mismatch: {} Font Tools: {} Free Type: {}'.format(character_hex, glyphname, ft_glyphname))

else:
character_hex = convert_string_to_hex(character)
print('Glyph Missing in Free Type: {}'.format(character_hex))

print('Finished Checking Mapping')

FT_Done_Face(freetype_face)
FT_Done_FreeType(library)
41 changes: 41 additions & 0 deletions examples/uvs-harness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# shorter Unicode Variation Sequences example from #195, with comparison to hb-shape.

# The test file used and known to work is v2.004 of https://github.com/adobe-fonts/source-han-sans/blob/release/OTF/Japanese/SourceHanSans-Regular.otf

import importlib
uvs = importlib.import_module("unicode-variation-sequences")

read_freetype_cmap = uvs.read_freetype_cmap

if __name__ == "__main__":
import os, io
directory = os.path.dirname(__file__)
font_path = os.path.join(directory, 'SourceHanSans-Regular.otf')
memory_file = io.BytesIO()
with open(font_path, 'rb') as fontfile:
memory_file.write(fontfile.read())
memory_file.seek(0)

from freetype import *
library = FT_Library()
error = FT_Init_FreeType(byref(library))
if error: raise FT_Exception(error)

freetype_face = FT_Face()
data = memory_file.getvalue()
error = FT_New_Memory_Face(library, data, len(data), 0, byref(freetype_face))
if error: raise FT_Exception(error)

all_freetype_characters = read_freetype_cmap(freetype_face)
# {'邉' : ['邉󠄁', '邉󠄂', '邉󠄃', '邉󠄄', '邉󠄅', '邉󠄆', '邉󠄇', '邉󠄈', '邉󠄉', '邉󠄊', '邉󠄋', '邉󠄌', '邉󠄍', '邉󠄎', '邉󠄀']}

print(all_freetype_characters['邉'], all_freetype_characters['邉󠄁'], all_freetype_characters['邉󠄂'], all_freetype_characters['邉󠄃'],
all_freetype_characters['邉󠄄'], all_freetype_characters['邉󠄅'], all_freetype_characters['邉󠄆'], all_freetype_characters['邉󠄇'],
all_freetype_characters['邉󠄈'], all_freetype_characters['邉󠄉'], all_freetype_characters['邉󠄊'], all_freetype_characters['邉󠄋'],
all_freetype_characters['邉󠄌'], all_freetype_characters['邉󠄍'], all_freetype_characters['邉󠄎'], all_freetype_characters['邉󠄀'],
sep='|')
print('The above should be identical to the output of this hb-shape command:')
print(' hb-shape --no-glyph-names --no-positions --no-clusters --no-advances SourceHanSans-Regular.otf "邉邉󠄁邉󠄂邉󠄃邉󠄄邉󠄅邉󠄆邉󠄇邉󠄈邉󠄉邉󠄊邉󠄋邉󠄌邉󠄍邉󠄎邉󠄀"')
22 changes: 22 additions & 0 deletions freetype/raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,28 @@ def FT_Library_SetLcdFilter (*args, **kwargs):
POINTER(FT_OpaquePaint)]
except AttributeError:
pass
try:
FT_Face_GetCharVariantIndex = _lib.FT_Face_GetCharVariantIndex
FT_Face_GetCharVariantIndex.argtypes = [FT_Face, FT_ULong, FT_ULong]
FT_Face_GetCharVariantIndex.restype = FT_UInt

FT_Face_GetCharVariantIsDefault = _lib.FT_Face_GetCharVariantIsDefault
FT_Face_GetCharVariantIsDefault.argtypes = [FT_Face, FT_ULong, FT_ULong]
FT_Face_GetCharVariantIsDefault.restype = FT_Int

FT_Face_GetVariantSelectors = _lib.FT_Face_GetVariantSelectors
FT_Face_GetVariantSelectors.argtypes = [FT_Face]
FT_Face_GetVariantSelectors.restype = POINTER(FT_UInt32)

FT_Face_GetVariantsOfChar = _lib.FT_Face_GetVariantsOfChar
FT_Face_GetVariantsOfChar.argtypes = [FT_Face, FT_ULong]
FT_Face_GetVariantsOfChar.restype = POINTER(FT_UInt32)

FT_Face_GetCharsOfVariant = _lib.FT_Face_GetCharsOfVariant
FT_Face_GetVariantsOfChar.argtypes = [FT_Face, FT_ULong]
FT_Face_GetCharsOfVariant.restype = POINTER(FT_UInt32)
except AttributeError:
pass
FT_Get_Module = _lib.FT_Get_Module
FT_Get_Multi_Master = _lib.FT_Get_Multi_Master
FT_Get_PFR_Advance = _lib.FT_Get_PFR_Advance
Expand Down

0 comments on commit d996824

Please sign in to comment.