forked from yuvalpinter/m3gm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
io_utils.py
64 lines (54 loc) · 1.94 KB
/
io_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy as np
import pickle as pickle
import codecs
from collections import namedtuple
from datetime import datetime
from consts import DEFAULT_EMBEDDING_DIM
__author__ = "Yuval Pinter, 2018"
WordnetPredictionDataset = namedtuple('WordnetPredictionDataset', ['train', 'dev', 'test'])
SparseMatrixDataset = namedtuple('SparseMatrixDataset', ['matrices', 'index'])
def timeprint(str):
"""
Unclever method for logging just the time of the printed line.
:param str:
:return:
"""
print('{}\t{}'.format(datetime.now(), str))
def load_prediction_dataset(filename):
"""
:param filename: file containing WordnetPredictionDataset with WordNet graphs in train, dev and test
"""
ds = pickle.load(open(filename, 'rb'))
return ds, ds.train.index
def load_graphs(filename):
"""
loads WordNet graphs from pre-pickled resource
:param filename: .pkl file with graph in sparse matrices format
"""
ds = pickle.load(open(filename, 'rb'))
return ds.matrices, ds.index
def load_embeddings(filename, a2i, emb_size=DEFAULT_EMBEDDING_DIM):
"""
loads embeddings for synsets ("atoms") from existing file,
or initializes them to uniform random
"""
atom_to_embed = {}
if filename is not None:
if filename.endswith('npy'):
return np.load(filename)
with codecs.open(filename, "r", "utf-8") as f:
for line in f:
split = line.split()
if len(split) > 2:
atom = split[0]
vec = split[1:]
atom_to_embed[atom] = np.asfarray(vec)
embedding_dim = len(atom_to_embed[list(atom_to_embed.keys())[0]])
else:
embedding_dim = emb_size
out = np.random.uniform(-0.8, 0.8, (len(a2i), embedding_dim))
if filename is not None:
for atom, embed in list(atom_to_embed.items()):
if atom in a2i:
out[a2i[atom]] = np.array(embed)
return out