Skip to content

Commit

Permalink
#62 - Implement variable detector disambiguator component
Browse files Browse the repository at this point in the history
  • Loading branch information
maxxkia committed Dec 19, 2017
1 parent 6d01dbb commit b06f9c7
Showing 1 changed file with 62 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package eu.openminted.uc.socialsciences.variabledetection;

import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;

import java.io.File;
import java.io.IOException;

import org.apache.uima.UIMAException;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.resource.ResourceInitializationException;

import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiWriter;
import de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpNamedEntityRecognizer;
import de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger;
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordLemmatizer;
import de.tudarmstadt.ukp.dkpro.core.stopwordremover.StopWordRemover;
import de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter;
import eu.openminted.uc.socialsciences.variabledetection.detection.VariableMentionDetector;
import eu.openminted.uc.socialsciences.variabledetection.disambiguation.VariableMentionDisambiguator;
import eu.openminted.uc.socialsciences.variabledetection.io.XmlCorpusAllDocsReader;

public class DetectionDisambiguationPipeline
{
private static final String COPRUS_FILEPATH_TEST = "/home/local/UKP/kiaeeha/workspace/Datasets"
+ "/openminted/uc-ss/variable-detection/detection/Full_ALLDOCS.xml";
private static final String LANGUAGE_CODE = "en";
public static final File PREDICTION_PATH = new File("target/prediction");

/**
* Starts the experiment.
*/
public static void main(String[] args) throws Exception
{
DetectionDisambiguationPipeline experiment = new DetectionDisambiguationPipeline();
experiment.run();
}

protected void run()
throws ResourceInitializationException, UIMAException, IOException
{
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
XmlCorpusAllDocsReader.class, XmlCorpusAllDocsReader.PARAM_SOURCE_LOCATION,
COPRUS_FILEPATH_TEST, XmlCorpusAllDocsReader.PARAM_LANGUAGE, LANGUAGE_CODE);

SimplePipeline.runPipeline(
reader,
//Preprocessing should be the same as the one used for model training
createEngineDescription(BreakIteratorSegmenter.class),
createEngineDescription(OpenNlpPosTagger.class),
createEngineDescription(StanfordLemmatizer.class),
createEngineDescription(OpenNlpNamedEntityRecognizer.class),
createEngineDescription(StopWordRemover.class,
StopWordRemover.PARAM_MODEL_LOCATION, getClass().getResource("/stopwords/english.txt").toString()),
createEngineDescription(VariableMentionDetector.class),
createEngineDescription(VariableMentionDisambiguator.class),
createEngineDescription(XmiWriter.class,
XmiWriter.PARAM_TARGET_LOCATION, PREDICTION_PATH,
XmiWriter.PARAM_OVERWRITE, true));
}
}

0 comments on commit b06f9c7

Please sign in to comment.