This repository has been archived by the owner on Aug 30, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathconvert_gptneox.py
216 lines (188 loc) · 8.19 KB
/
convert_gptneox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# Copyright (c) 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Convert Hugging Face fine-tuned gpt-neox-like models to ne format
#
# Usage:
#
# python3 models/convert-h5-to-ne.py
#
# This script is similar to "convert-pt-to-ne.py"
#
import io
import os
import sys
import struct
import json
import code
import torch
import numpy as np
from pathlib import Path
import argparse
from typing import (IO, TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Literal, Optional, Sequence, Tuple, TypeVar,
Union)
from transformers import AutoModelForCausalLM, AutoTokenizer
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
def bytes_to_unicode():
"""
Returns list of utf-8 byte and a corresponding list of unicode strings.
The reversible bpe codes work on unicode strings.
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
This is a significant percentage of your normal, say, 32K bpe vocab.
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
And avoids mapping to whitespace/control characters the bpe code barfs on.
"""
bs = list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1))
cs = bs[:]
n = 0
for b in range(2**8):
if b not in bs:
bs.append(b)
cs.append(2**8 + n)
n += 1
cs = [chr(n) for n in cs]
return dict(zip(bs, cs))
def main(args_in: Optional[List[str]] = None) -> None:
parser = argparse.ArgumentParser(description="Convert a model to a NE compatible file")
parser.add_argument("--outtype", choices=["f32", "f16"], help="output format (default: based on input)")
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
parser.add_argument("--model_hub", choices=["huggingface","modelscope"],
default="huggingface", help="hub to load model")
parser.add_argument("model", type=Path, help="directory containing model file")
args = parser.parse_args(args_in)
dir_model = args.model.as_posix()
fname_out = args.outfile.as_posix()
# possible data types
# ftype == 0 -> float32
# ftype == 1 -> float16
ftype = 0
if args.outtype == "f16":
ftype = 1
if args.model_hub == "modelscope":
from modelscope import AutoModelForCausalLM, AutoTokenizer
else:
from transformers import AutoModelForCausalLM, AutoTokenizer
print("Loading model: ", dir_model)
model = AutoModelForCausalLM.from_pretrained(dir_model, torch_dtype=torch.float16 if ftype == 1 else torch.float32,
trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
model.eval()
for p in model.parameters():
p.requires_grad = False
hparams = model.config.to_dict()
print("Model loaded: ", dir_model)
fout = open(fname_out, "wb")
# 0x67676d6c is unversioned ne
# 0x67676d66 is versioned ggmf (requires token scores)
ne_file_magic = 0x67676d6c
#ne_file_version = 0x00000001 # v1
hparams["multiple_of"] = 1
fout.write(struct.pack("i", ne_file_magic)) # magic: ne in hex
#fout.write(struct.pack("i", ne_file_version))
fout.write(struct.pack("i", hparams["vocab_size"]))
fout.write(struct.pack("i", hparams["hidden_size"]))
fout.write(struct.pack("i", 0)) # dummy data
fout.write(struct.pack("i", hparams["num_attention_heads"]))
fout.write(struct.pack("i", hparams.get("n_head_kv", 0))) # multi-query attention
fout.write(struct.pack("i", hparams["num_hidden_layers"]))
fout.write(struct.pack("i", int((hparams["hidden_size"] / hparams["num_attention_heads"]) * hparams["rotary_pct"])))
fout.write(struct.pack("i", ftype))
fout.write(struct.pack("i", 0))
fout.write(struct.pack("f", 0.0))
fout.write(struct.pack("f", 0.0))
fout.write(struct.pack("i", int(hparams["use_parallel_residual"])))
fout.write(struct.pack("i", 0)) # word_embed_proj_dim (for opt)
fout.write(struct.pack("i", 0)) # do_layer_norm_before (for opt)
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", 0))
fout.write(struct.pack("i", 0)) # n_experts
fout.write(struct.pack("i", 0)) # n_expert_used
fout.write(struct.pack("i", 0)) # n_embd_head_k for gemma
fout.write(struct.pack("f", hparams.get("layer_norm_eps", 1e-5))) # rms_norm_eps or layer_norm_eps
fout.write(struct.pack("f", 10000.0)) # freq_base
fout.write(struct.pack("f", 1.0)) # rope_factor
fout.write(struct.pack("f", 0.0)) # config.json "rope_scaling.factor", not enabled
fout.write(struct.pack("i", 0)) # rope_scaling.original_max_position_embeddings
fout.write(struct.pack("i", 0)) # params["rope_scaling"]["type"] =="yarn" else 0))
fout.write(struct.pack("i", tokenizer.bos_token_id if tokenizer.bos_token_id is not None else 1))
fout.write(struct.pack("i", tokenizer.eos_token_id if tokenizer.eos_token_id is not None else 2))
fout.write(struct.pack("i", tokenizer.pad_token_id if tokenizer.pad_token_id is not None else -1))
fout.write(struct.pack("i", tokenizer.sep_token_id if tokenizer.sep_token_id is not None else -1))
# write vocab
vocab_size = hparams["vocab_size"]
encoder = tokenizer.vocab
# Add added_tokens (special tokens) to the encoder
encoder.update(tokenizer.get_added_vocab())
byte_encoder = bytes_to_unicode()
byte_decoder = {v: k for k, v in byte_encoder.items()}
counter = 0
# sort by value
for key in sorted(encoder, key=encoder.get):
# workaround for key error when c not found
text = ""
for c in key:
if c not in byte_decoder:
text += c
else:
text += chr(byte_decoder[c])
text = bytearray(text, encoding="utf-8")
fout.write(struct.pack("i", len(text)))
fout.write(text)
counter += 1
# Repeat last token until vocab_size
while counter < vocab_size:
fout.write(struct.pack("i", len(text)))
fout.write(text)
counter += 1
list_vars = model.state_dict()
print(hparams)
for name in list_vars.keys():
if name.startswith('gpt_neox.layers.'):
if 'attention.masked_bias' in name or \
'attention.rotary_emb.inv_freq' in name or \
'attention.bias' in name:
continue
# No gradients for these
list_vars[name].requires_grad = False
src = name
nn = name
print(src, ' -> ', name)
data = list_vars[src].squeeze().numpy()
data = data.astype(np.float32)
n_dims = len(data.shape)
print(name, n_dims, data.shape)
# default type is fp32
ftype_cur = 0
if ftype == 1 and n_dims > 1:
print(" Converting to float16", data.shape, data[:3, :3].tolist())
data = data.astype(np.float16)
ftype_cur = 1
else:
print(" Converting to float32", data.shape, data[:3, :3].tolist() if n_dims > 1 else data[:3].tolist())
data = data.astype(np.float32)
# header
str = name.encode('utf-8')
fout.write(struct.pack("iii", n_dims, len(str), ftype_cur))
for i in range(n_dims):
fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
print(str)
fout.write(str)
# data
data.tofile(fout)
fout.close()
print("Done. Output file: " + fname_out)
print("")
if __name__ == '__main__':
main()