Skip to content

Commit

Permalink
Merge pull request #29 from zoho/hawking_dev
Browse files Browse the repository at this point in the history
Master Merge Hawking Enhancement v0.1.7
  • Loading branch information
ArulVendhan authored Oct 31, 2022
2 parents f320cf1 + e77b7bf commit e1a1144
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 26 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<groupId>com.zoho</groupId>
<artifactId>hawking</artifactId>
<version>0.1.6</version>
<version>0.1.7</version>
<packaging>jar</packaging>
<name>Hawking</name>
<description>Hawking is a natural language date time parser that extracts date and time from text with context and parse to the required format.</description>
Expand Down Expand Up @@ -54,7 +54,7 @@
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>4.2.0</version>
<version>4.5.1</version>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
Expand Down
34 changes: 11 additions & 23 deletions src/main/java/com/zoho/hawking/language/english/Parser.java
Original file line number Diff line number Diff line change
@@ -1,45 +1,33 @@
//$Id$
package com.zoho.hawking.language.english;

import com.zoho.hawking.utils.CommonUtils;
import com.zoho.hawking.utils.Constants;
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.util.Triple;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;

public class Parser {

private static final Logger LOGGER = Logger.getLogger(Parser.class.getName());

static CRFClassifier<CoreLabel> crf = getCRFInstance();

private static CRFClassifier<CoreLabel> getCRFInstance() {
Properties props = new Properties();
try {
props.load(CommonUtils.readIsFromClasspath(Constants.PARSERPROPSPATH));
SeqClassifierFlags flags = new SeqClassifierFlags(props);
CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(flags);
InputStream parserModel = IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(Constants.PARSERMODELPATH);
LOGGER.info("Loading Parser Model"); //No I18N
crf.loadClassifier(parserModel);
LOGGER.info("Parser Model Loaded"); //No I18N
return crf;
} catch (ClassCastException | ClassNotFoundException | IOException e) {
LOGGER.log(Level.SEVERE, "Parser :: Exception in parser class", e.getMessage());
return null;
}
}
static AbstractSequenceClassifier<CoreLabel> crf = getCRFInstance();

private static AbstractSequenceClassifier<CoreLabel> getCRFInstance() {
try {
InputStream parserModel = IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(Constants.PARSERMODELPATH);
return CRFClassifier.getClassifier(parserModel);
} catch (ClassCastException | ClassNotFoundException | IOException e) {
LOGGER.log(Level.SEVERE, "Parser :: Exception in parser class", e.getMessage());
return null;
}
}
public static List<Triple<String, Integer, Integer>> parse(String input) {
input = input.replaceAll("http","----"); //No I18N
input = input.replaceAll("www","---"); //No I18N
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/parser/parser.config.props
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ conjoinShapeNGrams = true
useNeighborNGrams = true

# If true, record the NGram features that correspond to a String (under the current option settings) and reuse rather than recalculating if the String is seen again.
cacheNGrams = true
cacheNGrams = false

# Do not include character n-gram features for n-grams that contain neither the beginning or end of the word
noMidNGrams = true
Expand Down
Binary file modified src/main/resources/parser/parser.crf.ser.gz
Binary file not shown.

0 comments on commit e1a1144

Please sign in to comment.