-
Notifications
You must be signed in to change notification settings - Fork 467
/
add_glyphs.py
442 lines (358 loc) · 14.8 KB
/
add_glyphs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
#!/usr/bin/env python3
"""Extend a ttx file with additional data.
Takes a ttx file and one or more directories containing image files named
after sequences of codepoints, extends the cmap, hmtx, GSUB, and GlyphOrder
tables in the source ttx file based on these sequences, and writes out a new
ttx file.
This can also apply aliases from an alias file."""
import argparse
import collections
import os
from os import path
import re
import sys
from fontTools import ttx
from fontTools.ttLib.tables import otTables
from fontTools.pens.ttGlyphPen import TTGlyphPen
from fontTools.ttLib.tables._c_m_a_p import CmapSubtable
from fontTools.ttLib import newTable
import add_emoji_gsub
import add_aliases
sys.path.append(
path.join(os.path.dirname(__file__), 'third_party', 'color_emoji'))
from png import PNG
def get_seq_to_file(image_dir, prefix, suffix):
"""Return a mapping from codepoint sequences to files in the given directory,
for files that match the prefix and suffix. File names with this prefix and
suffix should consist of codepoints in hex separated by underscore. 'fe0f'
(the codepoint of the emoji presentation variation selector) is stripped from
the sequence.
"""
start = len(prefix)
limit = -len(suffix)
seq_to_file = {}
for name in os.listdir(image_dir):
if not (name.startswith(prefix) and name.endswith(suffix)):
continue
try:
cps = [int(s, 16) for s in name[start:limit].split('_')]
seq = tuple(cp for cp in cps if cp != 0xfe0f)
except:
raise Exception('could not parse "%s"' % name)
for cp in cps:
if not (0 <= cp <= 0x10ffff):
raise Exception('bad codepoint(s) in "%s"' % name)
if seq in seq_to_file:
raise Exception('duplicate sequence for "%s" in %s' % (name, image_dir))
seq_to_file[seq] = path.join(image_dir, name)
return seq_to_file
def collect_seq_to_file(image_dirs, prefix, suffix):
"""Return a sequence to file mapping by calling get_seq_to_file on a list
of directories. When sequences for files in later directories match those
from earlier directories, the later file replaces the earlier one.
"""
seq_to_file = {}
for image_dir in image_dirs:
seq_to_file.update(get_seq_to_file(image_dir, prefix, suffix))
return seq_to_file
def remap_values(seq_to_file, map_fn):
return {k: map_fn(v) for k, v in seq_to_file.items()}
def get_png_file_to_advance_mapper(lineheight):
def map_fn(filename):
wid, ht = PNG(filename).get_size()
return int(round(float(lineheight) * wid / ht))
return map_fn
def cp_name(cp):
"""return uniXXXX or uXXXXX(X) as a name for the glyph mapped to this cp."""
return '%s%04X' % ('u' if cp > 0xffff else 'uni', cp)
def seq_name(seq):
"""Sequences of length one get the cp_name. Others start with 'u' followed by
two or more 4-to-6-digit hex strings separated by underscore."""
if len(seq) == 1:
return cp_name(seq[0])
return 'u' + '_'.join('%04X' % cp for cp in seq)
def collect_cps(seqs):
cps = set()
for seq in seqs:
cps.update(seq)
return cps
def get_glyphorder_cps_and_truncate(glyphOrder):
"""This scans glyphOrder for names that correspond to a single codepoint
using the 'u(ni)XXXXXX' syntax. All names that don't match are moved
to the front the glyphOrder list in their original order, and the
list is truncated. The ones that do match are returned as a set of
codepoints."""
glyph_name_re = re.compile(r'^u(?:ni)?([0-9a-fA-F]{4,6})$')
cps = set()
write_ix = 0
for ix, name in enumerate(glyphOrder):
m = glyph_name_re.match(name)
if m:
cps.add(int(m.group(1), 16))
else:
glyphOrder[write_ix] = name
write_ix += 1
del glyphOrder[write_ix:]
return cps
def get_all_seqs(font, seq_to_advance):
"""Copies the sequences from seq_to_advance and extends it with single-
codepoint sequences from the GlyphOrder table as well as those internal
to sequences in seq_to_advance. Reduces the GlyphOrder table. """
all_seqs = set(seq_to_advance.keys())
# using collect_cps includes cps internal to a seq
cps = collect_cps(all_seqs)
glyphOrder = font.getGlyphOrder()
# extract cps in glyphOrder and reduce glyphOrder to only those that remain
glyphOrder_cps = get_glyphorder_cps_and_truncate(glyphOrder)
cps.update(glyphOrder_cps)
# add new single codepoint sequences from glyphOrder and sequences
all_seqs.update((cp,) for cp in cps)
return all_seqs
def get_font_cmap(font):
"""Return the first cmap in the font, we assume it exists and is a unicode
cmap."""
return font['cmap'].tables[0].cmap
def add_glyph_data(font, seqs, seq_to_advance, vadvance, add_glyf):
"""Add hmtx and GlyphOrder data for all sequences in seqs, and ensures there's
a cmap entry for each single-codepoint sequence. Seqs not in seq_to_advance
will get a zero advance."""
# We allow the template cmap to omit mappings for single-codepoint glyphs
# defined in the template's GlyphOrder table. Similarly, the hmtx table can
# omit advances. We assume glyphs named 'uniXXXX' or 'uXXXXX(X)' in the
# GlyphOrder table correspond to codepoints based on the name; we don't
# attempt to handle other types of names and these must occur in the cmap and
# hmtx tables in the template.
#
# seq_to_advance maps sequences (including single codepoints) to advances.
# All codepoints in these sequences will be added to the cmap. Some cps
# in these sequences have no corresponding single-codepoint sequence, they
# will also get added.
#
# The added codepoints have no advance information, so will get a zero
# advance.
cmap = get_font_cmap(font)
hmtx = font['hmtx'].metrics
vmtx = font['vmtx'].metrics
# Add glyf table so empty glyphs will be added to ensure compatibility
# with systems requiring a glyf table, like Windows 10.
if add_glyf:
pen = TTGlyphPen(None)
empty_glyph = pen.glyph()
font['loca'] = newTable("loca")
font['glyf'] = glyf_table = newTable("glyf")
glyf_table.glyphOrder = font.getGlyphOrder()
glyf_table.glyphs = {g: empty_glyph for g in glyf_table.glyphOrder}
# We don't expect sequences to be in the glyphOrder, since we removed all the
# single-cp sequences from it and don't expect it to already contain names
# corresponding to multiple-cp sequencess. But just in case, we use
# reverseGlyphMap to avoid duplicating names accidentally.
updatedGlyphOrder = False
reverseGlyphMap = font.getReverseGlyphMap()
# Order the glyphs by grouping all the single-codepoint sequences first,
# then order by sequence so that related sequences are together. We group
# by single-codepoint sequence first in order to keep these glyphs together--
# they're used in the coverage tables for some of the substitutions, and
# those tables can be more compact this way.
for seq in sorted(seqs, key=lambda s: (0 if len(s) == 1 else 1, s)):
name = seq_name(seq)
if len(seq) == 1:
cmap[seq[0]] = name
advance = seq_to_advance.get(seq, 0)
hmtx[name] = [advance, 0]
vmtx[name] = [vadvance, 0]
if name not in reverseGlyphMap:
font.glyphOrder.append(name)
updatedGlyphOrder=True
if add_glyf:
glyf_table[name] = empty_glyph
if updatedGlyphOrder:
delattr(font, '_reverseGlyphOrderDict')
def add_aliases_to_cmap(font, aliases):
"""Some aliases might map a single codepoint to some other sequence. These
should map directly to the glyph for that sequence in the cmap. (Others will
map via GSUB).
"""
if not aliases:
return
cp_aliases = [seq for seq in aliases if len(seq) == 1]
if not cp_aliases:
return
cmap = get_font_cmap(font)
for src_seq in cp_aliases:
cp = src_seq[0]
name = seq_name(aliases[src_seq])
cmap[cp] = name
def get_rtl_seq(seq):
"""Return the rtl variant of the sequence, if it has one, else the empty
sequence.
"""
# Sequences with ZWJ in them will reflect. Fitzpatrick modifiers
# however do not, so if we reflect we make a pass to swap them back into their
# logical order.
# Used to check for TAG_END 0xe007f as well but Android fontchain_lint
# dislikes the resulting mangling of flags for England, Scotland, Wales.
ZWJ = 0x200d
def is_fitzpatrick(cp):
return 0x1f3fb <= cp <= 0x1f3ff
if ZWJ not in seq:
return ()
rev_seq = list(seq)
rev_seq.reverse()
for i in range(1, len(rev_seq)):
if is_fitzpatrick(rev_seq[i-1]):
tmp = rev_seq[i]
rev_seq[i] = rev_seq[i-1]
rev_seq[i-1] = tmp
return tuple(rev_seq)
def get_gsub_ligature_lookup(font):
"""If the font does not have a GSUB table, create one with a ligature
substitution lookup. If it does, ensure the first lookup is a properly
initialized ligature substitution lookup. Return the lookup."""
# The template might include more lookups after lookup 0, if it has a
# GSUB table.
if 'GSUB' not in font:
ligature_subst = otTables.LigatureSubst()
ligature_subst.ligatures = {}
lookup = otTables.Lookup()
lookup.LookupType = 4
lookup.LookupFlag = 0
lookup.SubTableCount = 1
lookup.SubTable = [ligature_subst]
font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
else:
lookup = font['GSUB'].table.LookupList.Lookup[0]
assert lookup.LookupFlag == 0
# importXML doesn't fully init GSUB structures, so help it out
st = lookup.SubTable[0]
if not hasattr(lookup, 'LookupType'):
assert st.LookupType == 4
setattr(lookup, 'LookupType', 4)
if not hasattr(st, 'ligatures'):
setattr(st, 'ligatures', {})
return lookup
def add_ligature_sequences(font, seqs, aliases):
"""Add ligature sequences."""
seq_to_target_name = {
seq: seq_name(seq) for seq in seqs if len(seq) > 1}
if aliases:
seq_to_target_name.update({
seq: seq_name(aliases[seq]) for seq in aliases if len(seq) > 1})
if not seq_to_target_name:
return
rtl_seq_to_target_name = {
get_rtl_seq(seq): name for seq, name in seq_to_target_name.items()}
seq_to_target_name.update(rtl_seq_to_target_name)
# sequences that don't have rtl variants get mapped to the empty sequence,
# delete it.
if () in seq_to_target_name:
del seq_to_target_name[()]
# organize by first codepoint in sequence
keyed_ligatures = collections.defaultdict(list)
for t in seq_to_target_name.items():
first_cp = t[0][0]
keyed_ligatures[first_cp].append(t)
def add_ligature(lookup, cmap, seq, name):
# The sequences consist of codepoints, but the entries in the ligature table
# are glyph names. Aliasing can give single codepoints names based on
# sequences (e.g. 'guardsman' with 'male guardsman') so we map the
# codepoints through the cmap to get the glyph names.
glyph_names = [cmap[cp] for cp in seq]
lig = otTables.Ligature()
lig.CompCount = len(seq)
lig.Component = glyph_names[1:]
lig.LigGlyph = name
ligatures = lookup.SubTable[0].ligatures
first_name = glyph_names[0]
try:
ligatures[first_name].append(lig)
except KeyError:
ligatures[first_name] = [lig]
lookup = get_gsub_ligature_lookup(font)
cmap = get_font_cmap(font)
for first_cp in sorted(keyed_ligatures):
pairs = keyed_ligatures[first_cp]
# Sort longest first, this ensures longer sequences with common prefixes
# are handled before shorter ones. The secondary sort is a standard
# sort on the codepoints in the sequence.
pairs.sort(key = lambda pair: (-len(pair[0]), pair[0]))
for seq, name in pairs:
add_ligature(lookup, cmap, seq, name)
def add_cmap_format_4(font):
"""Add cmap format 4 table for Windows support, based on the
format 12 cmap."""
cmap = get_font_cmap(font)
newtable = CmapSubtable.newSubtable(4)
newtable.platformID = 3
newtable.platEncID = 1
newtable.language = 0
# Format 4 only has unicode values 0x0000 to 0xFFFF
newtable.cmap = {cp: name for cp, name in cmap.items() if cp <= 0xFFFF}
font['cmap'].tables.append(newtable)
def update_font_data(font, seq_to_advance, vadvance, aliases, add_cmap4, add_glyf):
"""Update the font's cmap, hmtx, GSUB, and GlyphOrder tables."""
seqs = get_all_seqs(font, seq_to_advance)
add_glyph_data(font, seqs, seq_to_advance, vadvance, add_glyf)
add_aliases_to_cmap(font, aliases)
add_ligature_sequences(font, seqs, aliases)
if add_cmap4:
add_cmap_format_4(font)
def apply_aliases(seq_dict, aliases):
"""Aliases is a mapping from sequence to replacement sequence. We can use
an alias if the target is a key in the dictionary. Furthermore, if the
source is a key in the dictionary, we can delete it. This updates the
dictionary and returns the usable aliases."""
usable_aliases = {}
for k, v in aliases.items():
if v in seq_dict:
usable_aliases[k] = v
if k in seq_dict:
del seq_dict[k]
return usable_aliases
def update_ttx(in_file, out_file, image_dirs, prefix, ext, aliases_file, add_cmap4, add_glyf):
if ext != '.png':
raise Exception('extension "%s" not supported' % ext)
seq_to_file = collect_seq_to_file(image_dirs, prefix, ext)
if not seq_to_file:
raise ValueError(
'no sequences with prefix "%s" and extension "%s" in %s' % (
prefix, ext, ', '.join(image_dirs)))
aliases = None
if aliases_file:
aliases = add_aliases.read_emoji_aliases(aliases_file)
aliases = apply_aliases(seq_to_file, aliases)
font = ttx.TTFont()
font.importXML(in_file)
lineheight = font['hhea'].ascent - font['hhea'].descent
map_fn = get_png_file_to_advance_mapper(lineheight)
seq_to_advance = remap_values(seq_to_file, map_fn)
vadvance = font['vhea'].advanceHeightMax if 'vhea' in font else lineheight
update_font_data(font, seq_to_advance, vadvance, aliases, add_cmap4, add_glyf)
font.saveXML(out_file)
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'-f', '--in_file', help='ttx input file', metavar='file', required=True)
parser.add_argument(
'-o', '--out_file', help='ttx output file', metavar='file', required=True)
parser.add_argument(
'-d', '--image_dirs', help='directories containing image files',
nargs='+', metavar='dir', required=True)
parser.add_argument(
'-p', '--prefix', help='file prefix (default "emoji_u")',
metavar='pfx', default='emoji_u')
parser.add_argument(
'-e', '--ext', help='file extension (default ".png", currently only '
'".png" is supported', metavar='ext', default='.png')
parser.add_argument(
'-a', '--aliases', help='process alias table', const='emoji_aliases.txt',
nargs='?', metavar='file')
parser.add_argument(
'--add_cmap4', help='add cmap format 4 table', dest='add_cmap4', action='store_true')
parser.add_argument(
'--add_glyf', help='add glyf and loca tables', dest='add_glyf', action='store_true')
args = parser.parse_args()
update_ttx(
args.in_file, args.out_file, args.image_dirs, args.prefix, args.ext,
args.aliases, args.add_cmap4, args.add_glyf)
if __name__ == '__main__':
main()