-
Notifications
You must be signed in to change notification settings - Fork 7
/
corenlp.properties
74 lines (61 loc) · 3.57 KB
/
corenlp.properties
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
annotators = tokenize, ssplit, pos, lemma, ner, parse
tokenize.whitespace: true
ssplit.eolonly: true
# A true-casing annotator is also available (see below)
#annotators = tokenize, ssplit, pos, lemma, truecase
# A simple regex NER annotator is also available
# annotators = tokenize, ssplit, regexner
#Use these as EOS punctuation and discard them from the actual sentence content
#These are HTML tags that get expanded internally to correct syntax, e.g., from "p" to "<p>", "</p>" etc.
#Will have no effect if the "cleanxml" annotator is used
#ssplit.htmlBoundariesToDiscard = p,text
#
# None of these paths are necessary anymore: we load all models from the JAR file
#
#pos.model = /u/nlp/data/pos-tagger/wsj3t0-18-left3words/left3words-distsim-wsj-0-18.tagger
## slightly better model but much slower:
##pos.model = /u/nlp/data/pos-tagger/wsj3t0-18-bidirectional/bidirectional-distsim-wsj-0-18.tagger
# If you set ner.model, you can name any arbitrary model you want.
# The models named by ner.model.3class, ner.model.7class, and
# ner.model.MISCclass are also added in the order named.
# Any of the ner.model properties can be a comma separated list of names,
# in which case each of the models in the comma separated list is added.
#ner.model = ...
#ner.model.3class = /u/nlp/data/ner/goodClassifiers/all.3class.distsim.crf.ser.gz
#ner.model.7class = /u/nlp/data/ner/goodClassifiers/muc.distsim.crf.ser.gz
#ner.model.MISCclass = /u/nlp/data/ner/goodClassifiers/conll.distsim.crf.ser.gz
#regexner.mapping = /u/nlp/data/TAC-KBP2010/sentence_extraction/type_map_clean
#regexner.ignorecase = false
#nfl.gazetteer = /scr/nlp/data/machine-reading/Machine_Reading_P1_Reading_Task_V2.0/data/SportsDomain/NFLScoring_UseCase/NFLgazetteer.txt
#nfl.relation.model = /scr/nlp/data/ldc/LDC2009E112/Machine_Reading_P1_NFL_Scoring_Training_Data_V1.2/models/nfl_relation_model.ser
#nfl.entity.model = /scr/nlp/data/ldc/LDC2009E112/Machine_Reading_P1_NFL_Scoring_Training_Data_V1.2/models/nfl_entity_model.ser
#printable.relation.beam = 20
#parser.model = /u/nlp/data/lexparser/englishPCFG.ser.gz
#parser.flags = -retainTmpSubcategories
#srl.verb.args=/u/kristina/srl/verbs.core_args
#srl.model.cls=/u/nlp/data/srl/trainedModels/englishPCFG/cls/train.ann
#srl.model.id=/u/nlp/data/srl/trainedModels/englishPCFG/id/train.ann
#coref.model=/u/nlp/rte/resources/anno/coref/corefClassifierAll.March2009.ser.gz
#coref.name.dir=/u/nlp/data/coref/
#wordnet.dir=/u/nlp/data/wordnet/wordnet-3.0-prolog
#dcoref.demonym = /scr/heeyoung/demonyms.txt
#dcoref.animate = /scr/nlp/data/DekangLin-Animacy-Gender/Animacy/animate.unigrams.txt
#dcoref.inanimate = /scr/nlp/data/DekangLin-Animacy-Gender/Animacy/inanimate.unigrams.txt
#dcoref.male = /scr/nlp/data/Bergsma-Gender/male.unigrams.txt
#dcoref.neutral = /scr/nlp/data/Bergsma-Gender/neutral.unigrams.txt
#dcoref.female = /scr/nlp/data/Bergsma-Gender/female.unigrams.txt
#dcoref.plural = /scr/nlp/data/Bergsma-Gender/plural.unigrams.txt
#dcoref.singular = /scr/nlp/data/Bergsma-Gender/singular.unigrams.txt
#whether or not to print singleton entities
#output.printSingletonEntities = false
# This is the regular expression that describes which xml tags to keep
# the text from. In order to on off the xml removal, add cleanxml
# to the list of annotators above after "tokenize".
#clean.xmltags = .*
# A set of tags which will force the end of a sentence. HTML example:
# you would not want to end on <i>, but you would want to end on <p>.
# Once again, a regular expression.
# (Blank means there are no sentence enders.)
#clean.sentenceendingtags =
# Whether or not to allow malformed xml
#clean.allowflawedxml