-
Notifications
You must be signed in to change notification settings - Fork 88
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #196 from HinTak/variant-selector-addition
API for Unicode Variation Sequences, from https://freetype.org/freety…
- Loading branch information
Showing
3 changed files
with
372 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,309 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
# The test file used and known to work is v2.004 of https://github.com/adobe-fonts/source-han-sans/blob/release/OTF/Japanese/SourceHanSans-Regular.otf | ||
|
||
import io | ||
import os | ||
import traceback | ||
|
||
from typing import Dict | ||
|
||
from fontTools.ttLib import TTFont | ||
|
||
from ctypes import byref | ||
from freetype import ( | ||
FT_Done_Face, | ||
FT_Done_FreeType, | ||
FT_Exception, | ||
FT_Face, | ||
FT_Get_First_Char, | ||
FT_Get_Next_Char, | ||
FT_Init_FreeType, | ||
FT_Library, | ||
FT_New_Memory_Face, | ||
FT_UInt, | ||
FT_Get_Char_Index, | ||
FT_Face_GetVariantSelectors, | ||
FT_Face_GetCharsOfVariant, | ||
FT_Face_GetCharVariantIndex, | ||
FT_Face_GetCharVariantIsDefault | ||
) | ||
|
||
_ALL_IVS_NUMBERS = [x for x in range( 0xe0100, 0xe01f0)] # Supplemental | ||
_ALL_IVS_NUMBERS += [x for x in range( 0xfe00, 0xfe10)] # IVS | ||
|
||
def _read_cmap_uvs(uvsDict): | ||
global _ALL_IVS_NUMBERS | ||
|
||
all_characters = {} | ||
all_default_characters = [] | ||
for item, item_list in uvsDict.items(): | ||
ivs_val = int(item) | ||
if ivs_val in _ALL_IVS_NUMBERS: | ||
ivs_chr = chr(ivs_val) | ||
for character_tuple in item_list: | ||
character_val = character_tuple[0] | ||
glyph_name = character_tuple[1] | ||
if glyph_name: | ||
if glyph_name == '.notdef': | ||
continue | ||
|
||
character = chr(int(character_val)) | ||
ivs_character = character + ivs_chr | ||
assert ivs_character not in all_characters | ||
all_characters[ivs_character] = glyph_name | ||
else: | ||
character = chr(int(character_val)) | ||
ivs_character = character + ivs_chr | ||
all_default_characters.append(ivs_character) | ||
|
||
return all_characters, all_default_characters | ||
|
||
def _read_character(character_value, encoding): | ||
if encoding == 'utf_16_be': | ||
character_string = chr(character_value) | ||
else: | ||
return None | ||
|
||
return character_string | ||
|
||
_CMAP_PRIORITY_LIST = [ | ||
(3, 10), # Windows Unicode full repertoire | ||
(0, 6), # Unicode full repertoire (format 13 subtable) | ||
(0, 4), # Unicode 2.0 full repertoire | ||
(3, 1), # Windows Unicode BMP | ||
(0, 3), # Unicode 2.0 BMP | ||
(0, 2), # Unicode ISO/IEC 10646 | ||
(0, 1), # Unicode 1.1 | ||
(0, 0) # Unicode 1.0 | ||
] | ||
|
||
def read_fonttools_cmap(font) -> Dict[str, str]: | ||
global _CMAP_PRIORITY_LIST | ||
|
||
assert isinstance(font, TTFont) | ||
if not hasattr(font["cmap"], 'tables'): | ||
return None | ||
|
||
all_characters = {} | ||
all_default_characters = [] | ||
best_read_index = None | ||
all_tables = font["cmap"].tables | ||
for table in all_tables: | ||
encoding = table.getEncoding() | ||
if not encoding: | ||
continue | ||
if encoding != 'utf_16_be': | ||
continue | ||
try: | ||
if table.format == 14: | ||
if hasattr(table, 'uvsDict'): | ||
all_uvs_data, default_characters = _read_cmap_uvs(table.uvsDict) | ||
all_default_characters += default_characters | ||
for character, glyphname in all_uvs_data.items(): | ||
if character in all_characters: | ||
assert all_characters[character] == glyphname | ||
else: | ||
all_characters[character] = glyphname | ||
else: | ||
print('Unknown CMAP Format 14: {}:'.format(vars(table))) | ||
|
||
elif hasattr(table, 'cmap'): | ||
tuple_value = (table.platformID, table.platEncID) | ||
if tuple_value in _CMAP_PRIORITY_LIST: | ||
index_value = _CMAP_PRIORITY_LIST.index(tuple_value) | ||
if best_read_index: | ||
if index_value < best_read_index: | ||
best_read_index = index_value | ||
else: | ||
continue | ||
else: | ||
best_read_index = index_value | ||
|
||
all_items = table.cmap.items() | ||
length = len(all_items) | ||
if length == 0: | ||
if table.format != 6: | ||
print('Unknown CMAP Format {}: {}:'.format(table.format, vars(table))) | ||
|
||
for item in all_items: | ||
character = _read_character(item[0], encoding) | ||
glyphname = item[1] | ||
if glyphname == '.notdef': | ||
continue | ||
|
||
if character is not None: | ||
if character in all_characters: | ||
if all_characters[character] != glyphname: | ||
all_characters[character] = glyphname | ||
else: | ||
all_characters[character] = glyphname | ||
|
||
except: | ||
traceback.print_exc() | ||
continue | ||
|
||
if all_default_characters: | ||
for ivs_character in all_default_characters: | ||
first_character = ivs_character[0] | ||
if first_character in all_characters: | ||
glyphname = all_characters[first_character] | ||
all_characters[ivs_character] = glyphname | ||
|
||
return all_characters | ||
|
||
def read_freetype_cmap(face: FT_Face) -> Dict[str, int]: | ||
platID = face.contents.charmap.contents.platform_id | ||
encodingID = face.contents.charmap.contents.encoding_id | ||
if platID == 3: | ||
if encodingID not in [1, 10]: | ||
return {} | ||
|
||
elif platID == 0: # all unicode | ||
pass | ||
|
||
else: # everything else | ||
return {} | ||
|
||
all_characters = [] | ||
gindex = FT_UInt() | ||
charcode = FT_Get_First_Char( face, byref(gindex) ) | ||
while gindex.value != 0: | ||
character = chr(charcode) | ||
all_characters.append(character) | ||
charcode = FT_Get_Next_Char( face, charcode, byref(gindex) ) | ||
|
||
variant_selectors_list = FT_Face_GetVariantSelectors(face) | ||
if bool(variant_selectors_list): | ||
all_selectors = [] | ||
selector_value = variant_selectors_list[0] | ||
index = 0 | ||
|
||
while selector_value != 0: | ||
all_selectors.append(selector_value) | ||
|
||
index += 1 | ||
selector_value = variant_selectors_list[index] | ||
|
||
for selector_value in all_selectors: | ||
character_value_list = FT_Face_GetCharsOfVariant(face, selector_value) | ||
assert(bool(character_value_list)) | ||
character_value = character_value_list[0] | ||
index = 0 | ||
|
||
while character_value != 0: | ||
character = chr(character_value) + chr(selector_value) | ||
all_characters.append(character) | ||
|
||
index += 1 | ||
character_value = character_value_list[index] | ||
|
||
character_to_glyphID = {} | ||
for character in all_characters: | ||
if len(character) == 2: | ||
character_value = ord(character[0]) | ||
selector_value = ord(character[1]) | ||
glyphID = FT_Face_GetCharVariantIndex(face, character_value, selector_value) | ||
if glyphID != 0: | ||
assert character not in character_to_glyphID | ||
character_to_glyphID[character] = glyphID | ||
else: | ||
assert len(character) == 1 | ||
character_value = ord(character) | ||
glyphID = FT_Get_Char_Index(face, character_value) | ||
if glyphID != 0: | ||
assert character not in character_to_glyphID | ||
character_to_glyphID[character] = glyphID | ||
|
||
return character_to_glyphID | ||
|
||
def _convert_character_to_hex(text: str): | ||
assert len(text) == 1 | ||
value = ord(text) | ||
if 0x0000 <= value <= 0xFFFF: | ||
assert len(hex(value)) <= 6 | ||
return '{0:04x}'.format(value) | ||
elif value <= 0xFFFFF: | ||
assert len(hex(value)) <= 7 | ||
return '{0:05x}'.format(value) | ||
elif value <= 0xFFFFFF: | ||
assert len(hex(value)) <= 8 | ||
return '{0:06x}'.format(value) | ||
elif value <= 0xFFFFFFF: | ||
assert len(hex(value)) <= 9 | ||
return '{0:07x}'.format(value) | ||
elif value <= 0xFFFFFFFF: | ||
assert len(hex(value)) <= 9 | ||
return '{0:08x}'.format(value) | ||
else: | ||
raise RuntimeError() | ||
|
||
def convert_string_to_hex(text: str): | ||
assert isinstance(text, str) | ||
result = '' | ||
for count, character in enumerate(text): | ||
if count > 0: | ||
result += '-{}'.format(_convert_character_to_hex(character)) | ||
else: | ||
result += '{}'.format(_convert_character_to_hex(character)) | ||
|
||
return result | ||
|
||
if __name__ == "__main__": | ||
directory = os.path.dirname(__file__) | ||
font_path = os.path.join(directory, 'SourceHanSans-Regular.otf') | ||
memory_file = io.BytesIO() | ||
with open(font_path, 'rb') as fontfile: | ||
memory_file.write(fontfile.read()) | ||
memory_file.seek(0) | ||
|
||
fonttools_font = TTFont(memory_file, 0, allowVID=0, | ||
ignoreDecompileErrors=True, | ||
fontNumber=-1) | ||
|
||
library = FT_Library() | ||
error = FT_Init_FreeType(byref(library)) | ||
if error: raise FT_Exception(error) | ||
|
||
freetype_face = FT_Face() | ||
data = memory_file.getvalue() | ||
error = FT_New_Memory_Face(library, data, len(data), 0, byref(freetype_face)) | ||
if error: raise FT_Exception(error) | ||
|
||
all_freetype_characters = read_freetype_cmap(freetype_face) | ||
all_fonttools_characters = read_fonttools_cmap(fonttools_font) | ||
|
||
print('Read {} Free Type Characters'.format(len(all_freetype_characters))) | ||
print('Read {} Font Tools Characters'.format(len(all_fonttools_characters))) | ||
|
||
print('Checking Mapping') | ||
|
||
for character, glyphID in all_freetype_characters.items(): | ||
glyphname = fonttools_font.getGlyphName(glyphID) | ||
if character in all_fonttools_characters: | ||
ft_glyphname = all_fonttools_characters[character] | ||
if ft_glyphname != glyphname: | ||
character_hex = convert_string_to_hex(character) | ||
print('Glyph Mismatch: {} Free Type: {} Font Tools: {}'.format(character_hex, glyphname, ft_glyphname)) | ||
|
||
else: | ||
character_hex = convert_string_to_hex(character) | ||
print('Glyph Missing in Font Tools: {}'.format(character_hex)) | ||
|
||
for character, glyphname in all_fonttools_characters.items(): | ||
if character in all_freetype_characters: | ||
ft_glyphID = all_freetype_characters[character] | ||
ft_glyphname = fonttools_font.getGlyphName(ft_glyphID) | ||
if ft_glyphname != glyphname: | ||
character_hex = convert_string_to_hex(character) | ||
print('Glyph Mismatch: {} Font Tools: {} Free Type: {}'.format(character_hex, glyphname, ft_glyphname)) | ||
|
||
else: | ||
character_hex = convert_string_to_hex(character) | ||
print('Glyph Missing in Free Type: {}'.format(character_hex)) | ||
|
||
print('Finished Checking Mapping') | ||
|
||
FT_Done_Face(freetype_face) | ||
FT_Done_FreeType(library) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
# shorter Unicode Variation Sequences example from #195, with comparison to hb-shape. | ||
|
||
# The test file used and known to work is v2.004 of https://github.com/adobe-fonts/source-han-sans/blob/release/OTF/Japanese/SourceHanSans-Regular.otf | ||
|
||
import importlib | ||
uvs = importlib.import_module("unicode-variation-sequences") | ||
|
||
read_freetype_cmap = uvs.read_freetype_cmap | ||
|
||
if __name__ == "__main__": | ||
import os, io | ||
directory = os.path.dirname(__file__) | ||
font_path = os.path.join(directory, 'SourceHanSans-Regular.otf') | ||
memory_file = io.BytesIO() | ||
with open(font_path, 'rb') as fontfile: | ||
memory_file.write(fontfile.read()) | ||
memory_file.seek(0) | ||
|
||
from freetype import * | ||
library = FT_Library() | ||
error = FT_Init_FreeType(byref(library)) | ||
if error: raise FT_Exception(error) | ||
|
||
freetype_face = FT_Face() | ||
data = memory_file.getvalue() | ||
error = FT_New_Memory_Face(library, data, len(data), 0, byref(freetype_face)) | ||
if error: raise FT_Exception(error) | ||
|
||
all_freetype_characters = read_freetype_cmap(freetype_face) | ||
# {'邉' : ['邉󠄁', '邉󠄂', '邉󠄃', '邉󠄄', '邉󠄅', '邉󠄆', '邉󠄇', '邉󠄈', '邉󠄉', '邉󠄊', '邉󠄋', '邉󠄌', '邉󠄍', '邉󠄎', '邉󠄀']} | ||
|
||
print(all_freetype_characters['邉'], all_freetype_characters['邉󠄁'], all_freetype_characters['邉󠄂'], all_freetype_characters['邉󠄃'], | ||
all_freetype_characters['邉󠄄'], all_freetype_characters['邉󠄅'], all_freetype_characters['邉󠄆'], all_freetype_characters['邉󠄇'], | ||
all_freetype_characters['邉󠄈'], all_freetype_characters['邉󠄉'], all_freetype_characters['邉󠄊'], all_freetype_characters['邉󠄋'], | ||
all_freetype_characters['邉󠄌'], all_freetype_characters['邉󠄍'], all_freetype_characters['邉󠄎'], all_freetype_characters['邉󠄀'], | ||
sep='|') | ||
print('The above should be identical to the output of this hb-shape command:') | ||
print(' hb-shape --no-glyph-names --no-positions --no-clusters --no-advances SourceHanSans-Regular.otf "邉邉󠄁邉󠄂邉󠄃邉󠄄邉󠄅邉󠄆邉󠄇邉󠄈邉󠄉邉󠄊邉󠄋邉󠄌邉󠄍邉󠄎邉󠄀"') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters