-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.ini
87 lines (69 loc) · 3.16 KB
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# This configuration file follows the INI File Structure
# https://docs.python.org/3/library/configparser.html#supported-ini-file-structure
[KNOWLEDGE BASE]
# Directory to find full Wikipedia2vec (active KB with using Candidate Generation):
Wikipedia2vec Directory = /ex_data/knowledgebases/wikipedia2vec
# Knowledge Base Directory for no-candidate generation evaluation.
# Directory should contain a smaller version of Wikipedia2vec
Compact Wikipedia KB = /ex_data/knowledgebases/wikipedia_score_15
# Wikidata KB, must also contain a mapping from wikipedia to wikidata entities:
Compact Wikidata KB = /ex_data/knowledgebases/wikidata/score_15_with_conll
# Name of Wikipedia2vec file in the above Knowledge Base directories:
Wikipedia2Vec File = enwiki_20180420_100d.pkl
# Name of Wikidata to Wikipedia mapping file used in Wikidata KB
# File must be in Compact Wikidata KB
Wikidata To Wikipedia = mapping_enwiki_20180420_100d.tsv
# Alias Mapping TSV file for Candidate Generation
Alias Mapping = /ex_data/prob_yago_crosswikis_wikipedia_p_e_m.txt
# Used instead of Alias Mapping, if available
Alias Dict = /ex_data/alias_dict.json
Entity Dict = /ex_data/entity_dict.json
[DATA]
# File with Annotated Dataset with Wikipedia or Wikidata IDs
Annotated Dataset = /ex_data/annotated_datasets/conll-wikipedia-iob-annotations
; Annotated Dataset = /ex_data/annotated_datasets/conll-wikidata-iob-annotations
; Annotated Dataset = /ex_data/annotated_datasets/train_articles_dataset_score_0_50k.txt
# Directory to read and write generated input vectors. Must be generated from file 'Annotated Dataset'
# Use with Wikipedia Knowledge Base:
Uncased Input Vectors Dir = /data/conll_vectors_wikipedia
Cased Input Vectors Dir = /data/conll_vectors_wikipedia_cased
# Use with Wikidata Knowledge Base:
; Uncased Input Vectors Dir = /data/conll_vectors_wikidata
; Cased Input Vectors Dir = /data/conll_vectors_wikidata_cased
# The Wikipedia articles dataset with Wikidata KB
; Uncased Input Vectors Dir = /data/wiki_vectors_50k
# Split the dataset in parts (separate integers with commas)
# For AIDA-CoNLL:
Data Split = 946, 216, 231
# For Wikipedia Articles dataset:
; Data Split = 97, 1, 2
[MODEL]
# Name of the model if fetched from Huggingface:
; Model ID = bert-base-cased
Model ID = bert-base-uncased
# Directory from which to read BERT model, if not training new model
Bert Model Dir = /models/trained/
# Directory to write trained models
Save Model Dir = /models/trained/
# Use hidden layers in model output heads
Hidden Output Layers = True
# Use dropout after BERT-embeddings, before output heads
# (Recommended: False, Chen et al.: True)
Dropout After BERT = False
[TRAINING]
# Epochs of training. E.g. 512 / (batch size)
Epochs = 180
Batch Size = 4
# Chen et al. default: 2e-5
Initial Learning Rate = 2e-5
# Lambda hyperaparameter in Loss function: relative weight of ED to MD losses
# Recommended: 0.01, Chen et al.: 0.1
Loss Lambda = 0.01
Early Stopping = False
# Number of encoder transformers to freeze before training (max 12)
Freeze N Transformers = 0
[VERBOSITY]
# Frequency of progress updates during training in number of steps
Training Update Frequency = 66
Validation Update Frequency = 64
Test Update Frequency = 40