-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSentenceSplitter.java
39 lines (30 loc) · 1.24 KB
/
SentenceSplitter.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
public class SentenceSplitter {
private MaxentTagger tagger;
public SentenceSplitter(){
try {
this.tagger = new MaxentTagger("edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
} catch (ClassNotFoundException | IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.out.println("Can not load stanford model!");
}
}
public List<Sentence> process(String text){
StringReader sReader = new StringReader(text);
List<Sentence> sents = new ArrayList<Sentence>();
List<List<HasWord>> sentences = MaxentTagger.tokenizeText(sReader);
for (List<HasWord> sentence : sentences) {
List<TaggedWord> tSentence = tagger.tagSentence(sentence);
sents.add(new Sentence(text.substring(tSentence.get(0).beginPosition(), tSentence.get(tSentence.size()-1).endPosition()),
tSentence.get(0).beginPosition(), tSentence.get(tSentence.size()-1).endPosition()));
}
return sents;
}
}