Skip to content
This repository has been archived by the owner on Jan 15, 2024. It is now read-only.

Commit

Permalink
fastText crawl-300d-2M-subword (#336)
Browse files Browse the repository at this point in the history
* Add crawl-300d-2M-subword fastText vectors

* Update _constants.py

* Update test
  • Loading branch information
leezu authored Sep 17, 2018
1 parent a946474 commit fb27033
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
4 changes: 4 additions & 0 deletions gluonnlp/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@

FAST_TEXT_NPZ_SHA1 = {
'crawl-300d-2M': ('crawl-300d-2M.npz', '9dd611a1fe280c63050cd546d3595400fc0eede4'),
'crawl-300d-2M-subword': ('crawl-300d-2M-subword-927782c8e.npz',
'927782c8ec8c2c1deb6a8a550217478e183ca25a'),
'wiki.aa': ('wiki.aa.npz', '48f163b80eb37f1806142169d3d4c05cf75b7339'),
'wiki.ab': ('wiki.ab.npz', '860ceff119dd27e5b701b605879037c1310cbc3e'),
'wiki.ace': ('wiki.ace.npz', '62938287464040491719f56a6f521f8f808beee8'),
Expand Down Expand Up @@ -561,6 +563,8 @@


FAST_TEXT_BIN_SHA1 = {
'crawl-300d-2M-subword': ('crawl-300d-2M-subword-e6b07293f.bin',
'e6b07293f7b0095e3c72c2a12bc09464b69444b0'),
'cc.af.300': ('cc.af.300-33115ff8.bin', '33115ff8e4c8f439757c819399177f1f58f07f12'),
'cc.als.300': ('cc.als.300-d6579933.bin', 'd65799331a03895d68a3fbe7611b181d7e7cc916'),
'cc.am.300': ('cc.am.300-999b3e95.bin', '999b3e95a2c490d7fcab2a6e08074746303d3c17'),
Expand Down
4 changes: 2 additions & 2 deletions tests/unittest/test_vocab_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,14 +462,14 @@ def test_token_embedding_from_file(tmpdir, allow_extend):


def test_embedding_get_and_pretrain_file_names():
assert len(nlp.embedding.list_sources(embedding_name='fasttext')) == 484
assert len(nlp.embedding.list_sources(embedding_name='fasttext')) == 485
assert len(nlp.embedding.list_sources(embedding_name='glove')) == 10
assert len(nlp.embedding.list_sources(embedding_name='word2vec')) == 3

reg = nlp.embedding.list_sources(embedding_name=None)

assert len(reg['glove']) == 10
assert len(reg['fasttext']) == 484
assert len(reg['fasttext']) == 485
assert len(reg['word2vec']) == 3

with pytest.raises(KeyError):
Expand Down

0 comments on commit fb27033

Please sign in to comment.