Merge pull request #196 from HinTak/variant-selector-addition

API for Unicode Variation Sequences, from https://freetype.org/freety…
rougier · Jun 18, 2024 · d996824 · d996824
2 parents 2f8f7d8 + 11101e6
commit d996824
Show file tree

Hide file tree

Showing 3 changed files with 372 additions and 0 deletions.
diff --git a/examples/unicode-variation-sequences.py b/examples/unicode-variation-sequences.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# The test file used and known to work is v2.004 of https://github.com/adobe-fonts/source-han-sans/blob/release/OTF/Japanese/SourceHanSans-Regular.otf
+
+import io
+import os
+import traceback
+
+from typing import Dict
+
+from fontTools.ttLib import TTFont
+
+from ctypes import byref
+from freetype import (
+    FT_Done_Face,
+    FT_Done_FreeType,
+    FT_Exception,
+    FT_Face,
+    FT_Get_First_Char,
+    FT_Get_Next_Char,
+    FT_Init_FreeType,
+    FT_Library,
+    FT_New_Memory_Face,
+    FT_UInt,
+    FT_Get_Char_Index,
+    FT_Face_GetVariantSelectors,
+    FT_Face_GetCharsOfVariant,
+    FT_Face_GetCharVariantIndex,
+    FT_Face_GetCharVariantIsDefault
+)
+
+_ALL_IVS_NUMBERS  = [x for x in range( 0xe0100, 0xe01f0)] # Supplemental
+_ALL_IVS_NUMBERS += [x for x in range( 0xfe00,  0xfe10)]  # IVS
+
+def _read_cmap_uvs(uvsDict):
+    global _ALL_IVS_NUMBERS
+
+    all_characters         = {}
+    all_default_characters = []
+    for item, item_list in uvsDict.items():
+        ivs_val = int(item)
+        if ivs_val in _ALL_IVS_NUMBERS:
+            ivs_chr = chr(ivs_val)
+            for character_tuple in item_list:
+                character_val = character_tuple[0]
+                glyph_name    = character_tuple[1]
+                if glyph_name:
+                    if glyph_name == '.notdef':
+                        continue
+
+                    character     = chr(int(character_val))
+                    ivs_character = character + ivs_chr
+                    assert ivs_character not in all_characters
+                    all_characters[ivs_character] = glyph_name
+                else:
+                    character     = chr(int(character_val))
+                    ivs_character = character + ivs_chr
+                    all_default_characters.append(ivs_character)
+
+    return all_characters, all_default_characters
+
+def _read_character(character_value, encoding):  
+    if encoding == 'utf_16_be':
+        character_string = chr(character_value)  
+    else:
+        return None
+
+    return character_string
+
+_CMAP_PRIORITY_LIST = [
+    (3, 10),  # Windows Unicode full repertoire
+    (0,  6),  # Unicode full repertoire (format 13 subtable)
+    (0,  4),  # Unicode 2.0 full repertoire
+    (3,  1),  # Windows Unicode BMP
+    (0,  3),  # Unicode 2.0 BMP
+    (0,  2),  # Unicode ISO/IEC 10646
+    (0,  1),  # Unicode 1.1
+    (0,  0)   # Unicode 1.0
+]
+
+def read_fonttools_cmap(font) -> Dict[str, str]:
+    global _CMAP_PRIORITY_LIST
+
+    assert isinstance(font, TTFont)
+    if not hasattr(font["cmap"], 'tables'):
+        return None
+
+    all_characters         = {}
+    all_default_characters = []
+    best_read_index        = None
+    all_tables             = font["cmap"].tables
+    for table in all_tables:
+        encoding        = table.getEncoding()
+        if not encoding:
+            continue
+        if encoding != 'utf_16_be':
+            continue       
+        try:
+            if table.format == 14:
+                if hasattr(table, 'uvsDict'):
+                    all_uvs_data, default_characters = _read_cmap_uvs(table.uvsDict)
+                    all_default_characters          += default_characters
+                    for character, glyphname in all_uvs_data.items():
+                        if character in all_characters:
+                            assert all_characters[character] == glyphname
+                        else:
+                            all_characters[character] = glyphname
+                else:
+                    print('Unknown CMAP Format 14: {}:'.format(vars(table)))
+
+            elif hasattr(table, 'cmap'):
+                tuple_value = (table.platformID, table.platEncID)
+                if tuple_value in _CMAP_PRIORITY_LIST:
+                    index_value = _CMAP_PRIORITY_LIST.index(tuple_value)
+                    if best_read_index:
+                        if index_value < best_read_index:
+                            best_read_index = index_value
+                        else:
+                            continue
+                    else:
+                        best_read_index = index_value
+
+                all_items = table.cmap.items()
+                length = len(all_items)
+                if length == 0:
+                    if table.format != 6:
+                        print('Unknown CMAP Format {}: {}:'.format(table.format, vars(table)))
+
+                for item in all_items:
+                    character = _read_character(item[0], encoding)
+                    glyphname = item[1]
+                    if glyphname == '.notdef':
+                        continue
+
+                    if character is not None:
+                        if character in all_characters:
+                            if all_characters[character] != glyphname:
+                                all_characters[character] = glyphname
+                        else:
+                            all_characters[character] = glyphname
+
+        except:
+            traceback.print_exc()
+            continue
+
+    if all_default_characters:
+        for ivs_character in all_default_characters:
+            first_character = ivs_character[0]
+            if first_character in all_characters:
+                glyphname                     = all_characters[first_character]
+                all_characters[ivs_character] = glyphname
+
+    return all_characters
+
+def read_freetype_cmap(face: FT_Face) -> Dict[str, int]:
+    platID     = face.contents.charmap.contents.platform_id
+    encodingID = face.contents.charmap.contents.encoding_id
+    if platID == 3:
+        if encodingID not in [1, 10]:
+            return {}
+
+    elif platID == 0: # all unicode
+        pass
+
+    else: # everything else
+        return {}
+
+    all_characters = []
+    gindex         = FT_UInt()
+    charcode       = FT_Get_First_Char( face, byref(gindex) )
+    while gindex.value != 0:
+        character = chr(charcode)
+        all_characters.append(character)
+        charcode = FT_Get_Next_Char( face, charcode, byref(gindex) )
+
+    variant_selectors_list = FT_Face_GetVariantSelectors(face)
+    if bool(variant_selectors_list):
+        all_selectors  = []
+        selector_value = variant_selectors_list[0]
+        index          = 0
+
+        while selector_value != 0:
+            all_selectors.append(selector_value)
+
+            index         += 1
+            selector_value = variant_selectors_list[index]
+
+        for selector_value in all_selectors:
+            character_value_list = FT_Face_GetCharsOfVariant(face, selector_value)
+            assert(bool(character_value_list))
+            character_value = character_value_list[0]
+            index           = 0
+
+            while character_value != 0:
+                character = chr(character_value) + chr(selector_value)
+                all_characters.append(character)
+
+                index          += 1
+                character_value = character_value_list[index]
+
+    character_to_glyphID = {}
+    for character in all_characters:
+        if len(character) == 2:
+            character_value = ord(character[0])
+            selector_value  = ord(character[1])
+            glyphID         = FT_Face_GetCharVariantIndex(face, character_value, selector_value)
+            if glyphID != 0:
+                assert character not in character_to_glyphID
+                character_to_glyphID[character] = glyphID
+        else:
+            assert len(character) == 1
+            character_value = ord(character)
+            glyphID         = FT_Get_Char_Index(face, character_value)
+            if glyphID != 0:
+                assert character not in character_to_glyphID
+                character_to_glyphID[character] = glyphID
+
+    return character_to_glyphID
+
+def _convert_character_to_hex(text: str):
+    assert len(text) == 1
+    value = ord(text)
+    if 0x0000 <= value <= 0xFFFF:
+        assert len(hex(value)) <= 6
+        return '{0:04x}'.format(value)
+    elif value <= 0xFFFFF:
+        assert len(hex(value)) <= 7
+        return '{0:05x}'.format(value)
+    elif value <= 0xFFFFFF:
+        assert len(hex(value)) <= 8
+        return '{0:06x}'.format(value)
+    elif value <= 0xFFFFFFF:
+        assert len(hex(value)) <= 9
+        return '{0:07x}'.format(value)
+    elif value <= 0xFFFFFFFF:
+        assert len(hex(value)) <= 9
+        return '{0:08x}'.format(value)
+    else:
+        raise RuntimeError()
+
+def convert_string_to_hex(text: str):
+    assert isinstance(text, str)
+    result = ''
+    for count, character in enumerate(text):
+        if count > 0:
+            result += '-{}'.format(_convert_character_to_hex(character))
+        else:
+            result += '{}'.format(_convert_character_to_hex(character))
+
+    return result
+
+if __name__ == "__main__":
+    directory   = os.path.dirname(__file__)
+    font_path   = os.path.join(directory, 'SourceHanSans-Regular.otf')
+    memory_file = io.BytesIO()
+    with open(font_path, 'rb') as fontfile:
+        memory_file.write(fontfile.read())
+        memory_file.seek(0)
+
+    fonttools_font = TTFont(memory_file, 0, allowVID=0,
+                            ignoreDecompileErrors=True,
+                            fontNumber=-1)
+
+    library = FT_Library()
+    error   = FT_Init_FreeType(byref(library))
+    if error: raise FT_Exception(error)
+
+    freetype_face = FT_Face()
+    data          = memory_file.getvalue()
+    error         = FT_New_Memory_Face(library, data, len(data), 0, byref(freetype_face))
+    if error: raise FT_Exception(error)
+
+    all_freetype_characters  = read_freetype_cmap(freetype_face)
+    all_fonttools_characters = read_fonttools_cmap(fonttools_font)
+
+    print('Read {} Free Type Characters'.format(len(all_freetype_characters)))
+    print('Read {} Font Tools Characters'.format(len(all_fonttools_characters)))
+
+    print('Checking Mapping')
+
+    for character, glyphID in all_freetype_characters.items():
+        glyphname = fonttools_font.getGlyphName(glyphID)
+        if character in all_fonttools_characters:
+            ft_glyphname = all_fonttools_characters[character]
+            if ft_glyphname != glyphname:
+                character_hex = convert_string_to_hex(character)
+                print('Glyph Mismatch: {} Free Type: {} Font Tools: {}'.format(character_hex, glyphname, ft_glyphname))
+
+        else:
+            character_hex = convert_string_to_hex(character)
+            print('Glyph Missing in Font Tools: {}'.format(character_hex))
+
+    for character, glyphname in all_fonttools_characters.items():
+        if character in all_freetype_characters:
+            ft_glyphID   = all_freetype_characters[character]
+            ft_glyphname = fonttools_font.getGlyphName(ft_glyphID)
+            if ft_glyphname != glyphname:
+                character_hex = convert_string_to_hex(character)
+                print('Glyph Mismatch: {} Font Tools: {} Free Type: {}'.format(character_hex, glyphname, ft_glyphname))
+
+        else:
+            character_hex = convert_string_to_hex(character)
+            print('Glyph Missing in Free Type: {}'.format(character_hex))
+
+    print('Finished Checking Mapping')
+
+    FT_Done_Face(freetype_face)
+    FT_Done_FreeType(library)
diff --git a/examples/uvs-harness.py b/examples/uvs-harness.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# shorter Unicode Variation Sequences example from #195, with comparison to hb-shape.
+
+# The test file used and known to work is v2.004 of https://github.com/adobe-fonts/source-han-sans/blob/release/OTF/Japanese/SourceHanSans-Regular.otf
+
+import importlib
+uvs = importlib.import_module("unicode-variation-sequences")
+
+read_freetype_cmap = uvs.read_freetype_cmap
+
+if __name__ == "__main__":
+    import os, io
+    directory   = os.path.dirname(__file__)
+    font_path   = os.path.join(directory, 'SourceHanSans-Regular.otf')
+    memory_file = io.BytesIO()
+    with open(font_path, 'rb') as fontfile:
+        memory_file.write(fontfile.read())
+        memory_file.seek(0)
+
+    from freetype import *
+    library = FT_Library()
+    error   = FT_Init_FreeType(byref(library))
+    if error: raise FT_Exception(error)
+
+    freetype_face = FT_Face()
+    data          = memory_file.getvalue()
+    error         = FT_New_Memory_Face(library, data, len(data), 0, byref(freetype_face))
+    if error: raise FT_Exception(error)
+
+    all_freetype_characters  = read_freetype_cmap(freetype_face)
+    # {'邉' : ['邉󠄁', '邉󠄂', '邉󠄃', '邉󠄄', '邉󠄅', '邉󠄆', '邉󠄇', '邉󠄈', '邉󠄉', '邉󠄊', '邉󠄋', '邉󠄌', '邉󠄍', '邉󠄎', '邉󠄀']}
+
+    print(all_freetype_characters['邉'], all_freetype_characters['邉󠄁'], all_freetype_characters['邉󠄂'], all_freetype_characters['邉󠄃'], 
+          all_freetype_characters['邉󠄄'], all_freetype_characters['邉󠄅'], all_freetype_characters['邉󠄆'], all_freetype_characters['邉󠄇'],
+          all_freetype_characters['邉󠄈'], all_freetype_characters['邉󠄉'], all_freetype_characters['邉󠄊'], all_freetype_characters['邉󠄋'],
+          all_freetype_characters['邉󠄌'], all_freetype_characters['邉󠄍'], all_freetype_characters['邉󠄎'], all_freetype_characters['邉󠄀'],
+          sep='|')
+    print('The above should be identical to the output of this hb-shape command:')
+    print('    hb-shape --no-glyph-names --no-positions --no-clusters --no-advances SourceHanSans-Regular.otf "邉邉󠄁邉󠄂邉󠄃邉󠄄邉󠄅邉󠄆邉󠄇邉󠄈邉󠄉邉󠄊邉󠄋邉󠄌邉󠄍邉󠄎邉󠄀"')
diff --git a/freetype/raw.py b/freetype/raw.py
@@ -247,6 +247,28 @@ def FT_Library_SetLcdFilter (*args, **kwargs):
                                           POINTER(FT_OpaquePaint)]
 except AttributeError:
     pass
+try:
+    FT_Face_GetCharVariantIndex          = _lib.FT_Face_GetCharVariantIndex
+    FT_Face_GetCharVariantIndex.argtypes = [FT_Face, FT_ULong, FT_ULong]
+    FT_Face_GetCharVariantIndex.restype  = FT_UInt
+
+    FT_Face_GetCharVariantIsDefault          = _lib.FT_Face_GetCharVariantIsDefault
+    FT_Face_GetCharVariantIsDefault.argtypes = [FT_Face, FT_ULong, FT_ULong]
+    FT_Face_GetCharVariantIsDefault.restype  = FT_Int
+
+    FT_Face_GetVariantSelectors          = _lib.FT_Face_GetVariantSelectors
+    FT_Face_GetVariantSelectors.argtypes = [FT_Face]
+    FT_Face_GetVariantSelectors.restype  = POINTER(FT_UInt32)
+
+    FT_Face_GetVariantsOfChar          = _lib.FT_Face_GetVariantsOfChar
+    FT_Face_GetVariantsOfChar.argtypes = [FT_Face, FT_ULong]
+    FT_Face_GetVariantsOfChar.restype  = POINTER(FT_UInt32)
+
+    FT_Face_GetCharsOfVariant          = _lib.FT_Face_GetCharsOfVariant
+    FT_Face_GetVariantsOfChar.argtypes = [FT_Face, FT_ULong]
+    FT_Face_GetCharsOfVariant.restype  = POINTER(FT_UInt32)
+except AttributeError:
+    pass
 FT_Get_Module                  = _lib.FT_Get_Module
 FT_Get_Multi_Master            = _lib.FT_Get_Multi_Master
 FT_Get_PFR_Advance             = _lib.FT_Get_PFR_Advance