Skip to content

Commit

Permalink
Added comments and reorganized java code. Descriptors and tests are
Browse files Browse the repository at this point in the history
pending. #20
  • Loading branch information
ziy committed Aug 19, 2013
1 parent 593efc6 commit 0fd4b1f
Show file tree
Hide file tree
Showing 93 changed files with 2,096 additions and 2,136 deletions.
1 change: 0 additions & 1 deletion .settings/org.eclipse.core.resources.prefs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding/<project>=UTF-8
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package edu.cmu.lti.oaqa.bio.test.ziy.ie;
package edu.cmu.lti.oaqa.bio.core.ie;

import java.util.ArrayList;
import java.util.Collection;
Expand All @@ -13,14 +13,20 @@
import org.apache.uima.resource.ResourceInitializationException;

import util.SimilarityUtils;

import edu.cmu.lti.oaqa.bio.core.keyterm.pos.LingPipeHmmPosTagger;
import edu.cmu.lti.oaqa.bio.framework.data.BioKeyterm;
import edu.cmu.lti.oaqa.bio.framework.retrieval.DocumentRetrieverWrapper;
import edu.cmu.lti.oaqa.bio.test.ziy.keyterm.pos.LingPipeHmmPosTagger;
import edu.cmu.lti.oaqa.cse.basephase.ie.AbstractPassageUpdater;
import edu.cmu.lti.oaqa.framework.UimaContextHelper;
import edu.cmu.lti.oaqa.framework.data.Keyterm;

/**
* A passage extractor with static methods provided to process retrieved texts and enabling data
* source of the contents specified from a configuration file.
*
* @author Zi Yang <[email protected]>
*
*/
public abstract class ContentAwarePassageUpdater extends AbstractPassageUpdater {

protected DocumentRetrieverWrapper retriever;
Expand All @@ -29,45 +35,39 @@ public abstract class ContentAwarePassageUpdater extends AbstractPassageUpdater
public void initialize(UimaContext c) throws ResourceInitializationException {
super.initialize(c);
boolean zipped = UimaContextHelper.getConfigParameterBooleanValue(c, "Zipped", true);
try {
retriever = new DocumentRetrieverWrapper((String) c.getConfigParameterValue("Prefix"),
zipped, true);
} catch (NullPointerException e) {
retriever = new DocumentRetrieverWrapper(zipped, true);
}
retriever = new DocumentRetrieverWrapper((String) c.getConfigParameterValue("Url"),
(String) c.getConfigParameterValue("Prefix"), zipped);
}

public static Map<String, Double> getLowerCasedKeytermCount(List<Keyterm> keyterms) {
List<String> keytermStrs = new ArrayList<String>();
for (Keyterm keyterm : keyterms) {
//only consider the keyterms whose weights >= 0.4
// only consider the keyterms whose weights >= 0.4
// this is only for TREC 2006
// this can be set as a parameter
// TODO
if(keyterm.getProbability() >= 0.4)

//if (keyterm.getProbability() >= 0.3)
if (keyterm.getProbability() >= 0.4)
keytermStrs.add(keyterm.getText().toLowerCase());
}
return SimilarityUtils.countWord(keytermStrs.toArray(new String[0]));
}

public static Map<String, Double> getLowerCasedKeytermCount(List<Keyterm> keyterms, float threshold) {

public static Map<String, Double> getLowerCasedKeytermCount(List<Keyterm> keyterms,
float threshold) {
List<String> keytermStrs = new ArrayList<String>();
for (Keyterm keyterm : keyterms) {
//only consider the keyterms whose weights >= 0.4
// only consider the keyterms whose weights >= 0.4
// this is only for TREC 2006
// this can be set as a parameter
// TODO
//if(keyterm.getProbability() >= 0.4)

if (keyterm.getProbability() >= threshold)
keytermStrs.add(keyterm.getText().toLowerCase());
}
return SimilarityUtils.countWord(keytermStrs.toArray(new String[0]));
}

public static Map<String, Double> getLowerCasedKeytermTypes(List<Keyterm> keyterms, float threshold) {

public static Map<String, Double> getLowerCasedKeytermTypes(List<Keyterm> keyterms,
float threshold) {
Map<String, Double> keytermCount = getLowerCasedKeytermCount(keyterms, threshold);
for (String keyterm : keytermCount.keySet()) {
keytermCount.put(keyterm, 1.0);
Expand Down Expand Up @@ -102,14 +102,14 @@ public static Map<String, Double> getLowerCasedPassageTokenTypes(List<String> to
public static Map<String, String> getLowerCasedSynonymKeytermMapping(List<Keyterm> keyterms) {
Map<String, String> synonym2keyterm = new HashMap<String, String>();
for (Keyterm keyterm : keyterms) {
//only consider keyterms whose weights >= 0.4
if(keyterm.getProbability() >= 0.4) {
String text = keyterm.getText();
BioKeyterm biokeyterm = (BioKeyterm) keyterm;
//use refined synonyms
for (String synonym : biokeyterm.getSynonymsBySource("RefinedSynonyms")) {
synonym2keyterm.put(synonym.toLowerCase(), text);
}
// only consider keyterms whose weights >= 0.4
if (keyterm.getProbability() >= 0.4) {
String text = keyterm.getText();
BioKeyterm biokeyterm = (BioKeyterm) keyterm;
// use refined synonyms
for (String synonym : biokeyterm.getSynonymsBySource("RefinedSynonyms")) {
synonym2keyterm.put(synonym.toLowerCase(), text);
}
}
}
return synonym2keyterm;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,47 +17,24 @@
import edu.cmu.lti.oaqa.framework.data.RetrievalResult;

/**
* Passage extractor by retrieving relevant passage (specified by a given extend:
* {@link #passageSpan}) from Indri search engine.
*
* @author Zi Yang <[email protected]>
*
*/
public class DefaultPassageExtractor extends AbstractPassageExtractor {

/**
* @author yanfang
*/
protected static enum PassageSpanType {
/**
* @uml.property name="legalspan"
* @uml.associationEnd
*/
legalspan, /**
* @uml.property name="sentence"
* @uml.associationEnd
*/
sentence
legalspan, sentence
};

/**
* @uml.property name="hitListSize"
*/
protected int hitListSize;

/**
* @uml.property name="batchSize"
*/
protected int batchSize;

/**
* @uml.property name="passageSpan"
* @uml.associationEnd
*/
protected PassageSpanType passageSpan;

/**
* @uml.property name="wrapper"
* @uml.associationEnd
*/
protected static IndriWrapper wrapper;

@Override
Expand All @@ -80,9 +57,9 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
.getConfigParameterValue("PassageSpan"));
String serverUrl = (String) aContext.getConfigParameterValue("server");
Integer serverPort = (Integer) aContext.getConfigParameterValue("port");
try {
try {
if (wrapper == null) {
wrapper = new IndriWrapper(serverUrl, serverPort);
wrapper = new IndriWrapper(serverUrl, serverPort);
}
} catch (Exception e) {
throw new ResourceInitializationException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,15 @@
import edu.cmu.lti.oaqa.framework.data.RetrievalResult;

/**
* A base passage extractor by returning the first legal span from document.
*
* @author Zi Yang <[email protected]>
*
*/
public class FirstLegalSpanPassageExtractor extends AbstractPassageExtractor {

/**
* @uml.property name="hitListSize"
*/
private int hitListSize = 0;

/**
* @uml.property name="retriever"
* @uml.associationEnd
*/
private DocumentRetrieverWrapper retriever;

@Override
Expand All @@ -38,7 +32,8 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
if (hlsValue != null) {
this.hitListSize = hlsValue.intValue();
}
retriever = new DocumentRetrieverWrapper(true, false);
retriever = new DocumentRetrieverWrapper((String) aContext.getConfigParameterValue("url"),
(String) aContext.getConfigParameterValue("prefix"), true);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package edu.cmu.lti.oaqa.bio.test.yanfang.ie;
package edu.cmu.lti.oaqa.bio.core.ie;

import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -10,13 +10,14 @@
import org.apache.uima.UimaContext;
import org.apache.uima.resource.ResourceInitializationException;

import edu.cmu.lti.oaqa.bio.core.ie.DefaultPassageExtractor;
import edu.cmu.lti.oaqa.bio.retrieval.query.strategy.QueryGenerator;
import edu.cmu.lti.oaqa.bio.utils.retrieval.query.strategy.QueryGenerator;
import edu.cmu.lti.oaqa.framework.data.Keyterm;
import edu.cmu.lti.oaqa.framework.data.PassageCandidate;

/**
* @author Zi Yang <[email protected]>
* this strategy integrated synonyms, lexical variants, etc.
*
* @author yanfang <[email protected]>
*/
public class IndriLegalSpanPassageExtractor extends DefaultPassageExtractor {

Expand All @@ -27,34 +28,27 @@ public class IndriLegalSpanPassageExtractor extends DefaultPassageExtractor {
private String smoothingLambda;

private String backupQuery = "";

private String answerTypeWeight = "";

@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
super.initialize(aContext);

this.smoothing = aContext.getConfigParameterValue("smoothing").toString();
this.smoothingMu = aContext.getConfigParameterValue("smoothing-mu").toString();
this.smoothingLambda = aContext.getConfigParameterValue("smoothing-lambda").toString();
this.answerTypeWeight = aContext.getConfigParameterValue("answer-type-weight").toString();

}

@Override
protected String formulateQuery(List<Keyterm> keyterms) {

this.backupQuery = QueryGenerator.generateIndriQuery(keyterms,"[legalspan]",false, answerTypeWeight);

String s2 = QueryGenerator.generateIndriQuery(keyterms,"[legalspan]",true, answerTypeWeight);
System.out.println("Query~~~:" + s2);

return s2;
this.backupQuery = QueryGenerator.generateIndriQuery(keyterms, "[legalspan]", false,
answerTypeWeight);
return QueryGenerator.generateIndriQuery(keyterms, "[legalspan]", true, answerTypeWeight);
}

@Override
protected List<PassageCandidate> extractPassages(String query) {

String rule = "";
if (this.smoothing.startsWith("j"))
rule = "method:" + this.smoothing + "," + "collectionLambda:" + this.smoothingLambda;
Expand All @@ -63,40 +57,26 @@ protected List<PassageCandidate> extractPassages(String query) {
if (this.smoothing.startsWith("t"))
rule = "method:" + this.smoothing + "," + "lambda:" + this.smoothingLambda + "," + "mu:"
+ this.smoothingMu;

String[] rules = {rule};

String[] rules = { rule };
List<PassageCandidate> result = new ArrayList<PassageCandidate>();

try {
wrapper.getQueryEnvironment().setScoringRules(rules);

ScoredExtentResult[] sers = wrapper.getQueryEnvironment().runQuery(query,
hitListSize);
ScoredExtentResult[] sers = wrapper.getQueryEnvironment().runQuery(query, hitListSize);
String[] ids = wrapper.getQueryEnvironment().documentMetadata(sers, "docno");

System.out.println("SERS: " + sers.length);

ParsedDocument[] texts = null;

int count = 0;

for (int i = 0; i < ids.length; i++) {

//testAliveness();
// testAliveness();
if (i % batchSize == 0) {
ScoredExtentResult[] subSers = Arrays.copyOfRange(sers, i,
Math.min(i + batchSize, ids.length));
texts = wrapper.getQueryEnvironment().documents(subSers);

}
int begin = texts[i % batchSize].positions[sers[i].begin].begin;
int end = texts[i % batchSize].positions[sers[i].end - 1].end;
int offset = texts[i % batchSize].text.indexOf("<TEXT>") + 6;
assert offset >= 6;

// TODO FIX THIS

PassageCandidate r = new PassageCandidate(ids[i], begin - offset, end - offset,
(float) Math.exp(sers[i].score), query);
result.add(r);
Expand All @@ -120,9 +100,7 @@ protected List<PassageCandidate> extractPassages(String query) {
int end = texts[i % batchSize].positions[sers[i].end - 1].end;
int offset = texts[i % batchSize].text.indexOf("<TEXT>") + 6;
assert offset >= 6;

// TODO FIX THIS

PassageCandidate r = new PassageCandidate(ids2[i], begin - offset, end - offset,
(float) Math.exp(sers[i].score) / 10, query);
result.add(r);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
package edu.cmu.lti.oaqa.bio.test.yanfang.ie;
package edu.cmu.lti.oaqa.bio.core.ie;

import java.io.File;
import java.io.FileOutputStream;
import java.io.ObjectOutputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;

import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.resource.ResourceInitializationException;

import edu.cmu.lti.oaqa.cse.basephase.ie.AbstractPassageUpdater;
import edu.cmu.lti.oaqa.cse.basephase.retrieval.AbstractRetrievalUpdater;
import edu.cmu.lti.oaqa.framework.data.Keyterm;
import edu.cmu.lti.oaqa.framework.data.PassageCandidate;
import edu.cmu.lti.oaqa.framework.data.RetrievalResult;
Expand Down Expand Up @@ -48,7 +46,7 @@ protected List<PassageCandidate> updatePassages(String question, List<Keyterm> k
for (PassageCandidate passage : passages) {
temp.put(passage, passage.getProbability());
}

question2passages.put(question, temp);
return passages;
}
Expand Down
Loading

0 comments on commit 0fd4b1f

Please sign in to comment.