-
Notifications
You must be signed in to change notification settings - Fork 5
/
bgs.3class.geo.prop
54 lines (50 loc) · 1.56 KB
/
bgs.3class.geo.prop
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# location of the training file
trainFile = ./bgs.3class.geo-all-data.txt
# location where you would like to save (serialize) your
# classifier; adding .gz at the end automatically gzips the file,
# making it smaller, and faster to load
serializeTo = ./bgs.3class.geo.crf.ser.gz
# structure of your training file; this tells the classifier that
# the word is in column 0 and the correct answer is in column 1
map = word=0,answer=1
# This specifies the order of the CRF: order 1 means that features
# apply at most to a class pair of previous class and current class
# or current class and next class.
maxLeft=1
# these are the features we'd like to train with
# some are discussed below, the rest can be
# understood by looking at NERFeatureFactory
useClassFeature=true
useWord=true
# word character ngrams will be included up to length 6 as prefixes
# and suffixes only
useNGrams=true
noMidNGrams=true
maxNGramLeng=6
usePrev=true
useNext=true
useDisjunctive=true
useSequences=true
usePrevSequences=true
# the last 4 properties deal with word shape features
useTypeSeqs=true
useTypeSeqs2=true
useTypeySequences=true
wordShape=chris2useLC
# experimental features (my additions)
normalize=true
usePosition=true
useOccurrencePatterns=true
useLastRealWord=true
useNextRealWord=true
#disjunctionWidth=5
#wideDisjunctionWidth=5
#useLongSequences=true
#useTags=true
#useWordTag=true
#useWordPairs=true
# use a gazette.
# cleanGazette: if true, a gazette feature fires when all tokens of a gazette entry match
# (sloppyGazette fires for any token that matches)
cleanGazette=true
gazette=./vocab.gaz.txt