diff --git a/examples/unicode-variation-sequences.py b/examples/unicode-variation-sequences.py new file mode 100755 index 0000000..39971c3 --- /dev/null +++ b/examples/unicode-variation-sequences.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# The test file used and known to work is v2.004 of https://github.com/adobe-fonts/source-han-sans/blob/release/OTF/Japanese/SourceHanSans-Regular.otf + +import io +import os +import traceback + +from typing import Dict + +from fontTools.ttLib import TTFont + +from ctypes import byref +from freetype import ( + FT_Done_Face, + FT_Done_FreeType, + FT_Exception, + FT_Face, + FT_Get_First_Char, + FT_Get_Next_Char, + FT_Init_FreeType, + FT_Library, + FT_New_Memory_Face, + FT_UInt, + FT_Get_Char_Index, + FT_Face_GetVariantSelectors, + FT_Face_GetCharsOfVariant, + FT_Face_GetCharVariantIndex, + FT_Face_GetCharVariantIsDefault +) + +_ALL_IVS_NUMBERS = [x for x in range( 0xe0100, 0xe01f0)] # Supplemental +_ALL_IVS_NUMBERS += [x for x in range( 0xfe00, 0xfe10)] # IVS + +def _read_cmap_uvs(uvsDict): + global _ALL_IVS_NUMBERS + + all_characters = {} + all_default_characters = [] + for item, item_list in uvsDict.items(): + ivs_val = int(item) + if ivs_val in _ALL_IVS_NUMBERS: + ivs_chr = chr(ivs_val) + for character_tuple in item_list: + character_val = character_tuple[0] + glyph_name = character_tuple[1] + if glyph_name: + if glyph_name == '.notdef': + continue + + character = chr(int(character_val)) + ivs_character = character + ivs_chr + assert ivs_character not in all_characters + all_characters[ivs_character] = glyph_name + else: + character = chr(int(character_val)) + ivs_character = character + ivs_chr + all_default_characters.append(ivs_character) + + return all_characters, all_default_characters + +def _read_character(character_value, encoding): + if encoding == 'utf_16_be': + character_string = chr(character_value) + else: + return None + + return character_string + +_CMAP_PRIORITY_LIST = [ + (3, 10), # Windows Unicode full repertoire + (0, 6), # Unicode full repertoire (format 13 subtable) + (0, 4), # Unicode 2.0 full repertoire + (3, 1), # Windows Unicode BMP + (0, 3), # Unicode 2.0 BMP + (0, 2), # Unicode ISO/IEC 10646 + (0, 1), # Unicode 1.1 + (0, 0) # Unicode 1.0 +] + +def read_fonttools_cmap(font) -> Dict[str, str]: + global _CMAP_PRIORITY_LIST + + assert isinstance(font, TTFont) + if not hasattr(font["cmap"], 'tables'): + return None + + all_characters = {} + all_default_characters = [] + best_read_index = None + all_tables = font["cmap"].tables + for table in all_tables: + encoding = table.getEncoding() + if not encoding: + continue + if encoding != 'utf_16_be': + continue + try: + if table.format == 14: + if hasattr(table, 'uvsDict'): + all_uvs_data, default_characters = _read_cmap_uvs(table.uvsDict) + all_default_characters += default_characters + for character, glyphname in all_uvs_data.items(): + if character in all_characters: + assert all_characters[character] == glyphname + else: + all_characters[character] = glyphname + else: + print('Unknown CMAP Format 14: {}:'.format(vars(table))) + + elif hasattr(table, 'cmap'): + tuple_value = (table.platformID, table.platEncID) + if tuple_value in _CMAP_PRIORITY_LIST: + index_value = _CMAP_PRIORITY_LIST.index(tuple_value) + if best_read_index: + if index_value < best_read_index: + best_read_index = index_value + else: + continue + else: + best_read_index = index_value + + all_items = table.cmap.items() + length = len(all_items) + if length == 0: + if table.format != 6: + print('Unknown CMAP Format {}: {}:'.format(table.format, vars(table))) + + for item in all_items: + character = _read_character(item[0], encoding) + glyphname = item[1] + if glyphname == '.notdef': + continue + + if character is not None: + if character in all_characters: + if all_characters[character] != glyphname: + all_characters[character] = glyphname + else: + all_characters[character] = glyphname + + except: + traceback.print_exc() + continue + + if all_default_characters: + for ivs_character in all_default_characters: + first_character = ivs_character[0] + if first_character in all_characters: + glyphname = all_characters[first_character] + all_characters[ivs_character] = glyphname + + return all_characters + +def read_freetype_cmap(face: FT_Face) -> Dict[str, int]: + platID = face.contents.charmap.contents.platform_id + encodingID = face.contents.charmap.contents.encoding_id + if platID == 3: + if encodingID not in [1, 10]: + return {} + + elif platID == 0: # all unicode + pass + + else: # everything else + return {} + + all_characters = [] + gindex = FT_UInt() + charcode = FT_Get_First_Char( face, byref(gindex) ) + while gindex.value != 0: + character = chr(charcode) + all_characters.append(character) + charcode = FT_Get_Next_Char( face, charcode, byref(gindex) ) + + variant_selectors_list = FT_Face_GetVariantSelectors(face) + if bool(variant_selectors_list): + all_selectors = [] + selector_value = variant_selectors_list[0] + index = 0 + + while selector_value != 0: + all_selectors.append(selector_value) + + index += 1 + selector_value = variant_selectors_list[index] + + for selector_value in all_selectors: + character_value_list = FT_Face_GetCharsOfVariant(face, selector_value) + assert(bool(character_value_list)) + character_value = character_value_list[0] + index = 0 + + while character_value != 0: + character = chr(character_value) + chr(selector_value) + all_characters.append(character) + + index += 1 + character_value = character_value_list[index] + + character_to_glyphID = {} + for character in all_characters: + if len(character) == 2: + character_value = ord(character[0]) + selector_value = ord(character[1]) + glyphID = FT_Face_GetCharVariantIndex(face, character_value, selector_value) + if glyphID != 0: + assert character not in character_to_glyphID + character_to_glyphID[character] = glyphID + else: + assert len(character) == 1 + character_value = ord(character) + glyphID = FT_Get_Char_Index(face, character_value) + if glyphID != 0: + assert character not in character_to_glyphID + character_to_glyphID[character] = glyphID + + return character_to_glyphID + +def _convert_character_to_hex(text: str): + assert len(text) == 1 + value = ord(text) + if 0x0000 <= value <= 0xFFFF: + assert len(hex(value)) <= 6 + return '{0:04x}'.format(value) + elif value <= 0xFFFFF: + assert len(hex(value)) <= 7 + return '{0:05x}'.format(value) + elif value <= 0xFFFFFF: + assert len(hex(value)) <= 8 + return '{0:06x}'.format(value) + elif value <= 0xFFFFFFF: + assert len(hex(value)) <= 9 + return '{0:07x}'.format(value) + elif value <= 0xFFFFFFFF: + assert len(hex(value)) <= 9 + return '{0:08x}'.format(value) + else: + raise RuntimeError() + +def convert_string_to_hex(text: str): + assert isinstance(text, str) + result = '' + for count, character in enumerate(text): + if count > 0: + result += '-{}'.format(_convert_character_to_hex(character)) + else: + result += '{}'.format(_convert_character_to_hex(character)) + + return result + +if __name__ == "__main__": + directory = os.path.dirname(__file__) + font_path = os.path.join(directory, 'SourceHanSans-Regular.otf') + memory_file = io.BytesIO() + with open(font_path, 'rb') as fontfile: + memory_file.write(fontfile.read()) + memory_file.seek(0) + + fonttools_font = TTFont(memory_file, 0, allowVID=0, + ignoreDecompileErrors=True, + fontNumber=-1) + + library = FT_Library() + error = FT_Init_FreeType(byref(library)) + if error: raise FT_Exception(error) + + freetype_face = FT_Face() + data = memory_file.getvalue() + error = FT_New_Memory_Face(library, data, len(data), 0, byref(freetype_face)) + if error: raise FT_Exception(error) + + all_freetype_characters = read_freetype_cmap(freetype_face) + all_fonttools_characters = read_fonttools_cmap(fonttools_font) + + print('Read {} Free Type Characters'.format(len(all_freetype_characters))) + print('Read {} Font Tools Characters'.format(len(all_fonttools_characters))) + + print('Checking Mapping') + + for character, glyphID in all_freetype_characters.items(): + glyphname = fonttools_font.getGlyphName(glyphID) + if character in all_fonttools_characters: + ft_glyphname = all_fonttools_characters[character] + if ft_glyphname != glyphname: + character_hex = convert_string_to_hex(character) + print('Glyph Mismatch: {} Free Type: {} Font Tools: {}'.format(character_hex, glyphname, ft_glyphname)) + + else: + character_hex = convert_string_to_hex(character) + print('Glyph Missing in Font Tools: {}'.format(character_hex)) + + for character, glyphname in all_fonttools_characters.items(): + if character in all_freetype_characters: + ft_glyphID = all_freetype_characters[character] + ft_glyphname = fonttools_font.getGlyphName(ft_glyphID) + if ft_glyphname != glyphname: + character_hex = convert_string_to_hex(character) + print('Glyph Mismatch: {} Font Tools: {} Free Type: {}'.format(character_hex, glyphname, ft_glyphname)) + + else: + character_hex = convert_string_to_hex(character) + print('Glyph Missing in Free Type: {}'.format(character_hex)) + + print('Finished Checking Mapping') + + FT_Done_Face(freetype_face) + FT_Done_FreeType(library) diff --git a/examples/uvs-harness.py b/examples/uvs-harness.py new file mode 100644 index 0000000..7770a58 --- /dev/null +++ b/examples/uvs-harness.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# shorter Unicode Variation Sequences example from #195, with comparison to hb-shape. + +# The test file used and known to work is v2.004 of https://github.com/adobe-fonts/source-han-sans/blob/release/OTF/Japanese/SourceHanSans-Regular.otf + +import importlib +uvs = importlib.import_module("unicode-variation-sequences") + +read_freetype_cmap = uvs.read_freetype_cmap + +if __name__ == "__main__": + import os, io + directory = os.path.dirname(__file__) + font_path = os.path.join(directory, 'SourceHanSans-Regular.otf') + memory_file = io.BytesIO() + with open(font_path, 'rb') as fontfile: + memory_file.write(fontfile.read()) + memory_file.seek(0) + + from freetype import * + library = FT_Library() + error = FT_Init_FreeType(byref(library)) + if error: raise FT_Exception(error) + + freetype_face = FT_Face() + data = memory_file.getvalue() + error = FT_New_Memory_Face(library, data, len(data), 0, byref(freetype_face)) + if error: raise FT_Exception(error) + + all_freetype_characters = read_freetype_cmap(freetype_face) + # {'邉' : ['邉󠄁', '邉󠄂', '邉󠄃', '邉󠄄', '邉󠄅', '邉󠄆', '邉󠄇', '邉󠄈', '邉󠄉', '邉󠄊', '邉󠄋', '邉󠄌', '邉󠄍', '邉󠄎', '邉󠄀']} + + print(all_freetype_characters['邉'], all_freetype_characters['邉󠄁'], all_freetype_characters['邉󠄂'], all_freetype_characters['邉󠄃'], + all_freetype_characters['邉󠄄'], all_freetype_characters['邉󠄅'], all_freetype_characters['邉󠄆'], all_freetype_characters['邉󠄇'], + all_freetype_characters['邉󠄈'], all_freetype_characters['邉󠄉'], all_freetype_characters['邉󠄊'], all_freetype_characters['邉󠄋'], + all_freetype_characters['邉󠄌'], all_freetype_characters['邉󠄍'], all_freetype_characters['邉󠄎'], all_freetype_characters['邉󠄀'], + sep='|') + print('The above should be identical to the output of this hb-shape command:') + print(' hb-shape --no-glyph-names --no-positions --no-clusters --no-advances SourceHanSans-Regular.otf "邉邉󠄁邉󠄂邉󠄃邉󠄄邉󠄅邉󠄆邉󠄇邉󠄈邉󠄉邉󠄊邉󠄋邉󠄌邉󠄍邉󠄎邉󠄀"') diff --git a/freetype/raw.py b/freetype/raw.py index a07984a..ffc101b 100644 --- a/freetype/raw.py +++ b/freetype/raw.py @@ -247,6 +247,28 @@ def FT_Library_SetLcdFilter (*args, **kwargs): POINTER(FT_OpaquePaint)] except AttributeError: pass +try: + FT_Face_GetCharVariantIndex = _lib.FT_Face_GetCharVariantIndex + FT_Face_GetCharVariantIndex.argtypes = [FT_Face, FT_ULong, FT_ULong] + FT_Face_GetCharVariantIndex.restype = FT_UInt + + FT_Face_GetCharVariantIsDefault = _lib.FT_Face_GetCharVariantIsDefault + FT_Face_GetCharVariantIsDefault.argtypes = [FT_Face, FT_ULong, FT_ULong] + FT_Face_GetCharVariantIsDefault.restype = FT_Int + + FT_Face_GetVariantSelectors = _lib.FT_Face_GetVariantSelectors + FT_Face_GetVariantSelectors.argtypes = [FT_Face] + FT_Face_GetVariantSelectors.restype = POINTER(FT_UInt32) + + FT_Face_GetVariantsOfChar = _lib.FT_Face_GetVariantsOfChar + FT_Face_GetVariantsOfChar.argtypes = [FT_Face, FT_ULong] + FT_Face_GetVariantsOfChar.restype = POINTER(FT_UInt32) + + FT_Face_GetCharsOfVariant = _lib.FT_Face_GetCharsOfVariant + FT_Face_GetVariantsOfChar.argtypes = [FT_Face, FT_ULong] + FT_Face_GetCharsOfVariant.restype = POINTER(FT_UInt32) +except AttributeError: + pass FT_Get_Module = _lib.FT_Get_Module FT_Get_Multi_Master = _lib.FT_Get_Multi_Master FT_Get_PFR_Advance = _lib.FT_Get_PFR_Advance