Skip to content

Commit

Permalink
Merge branch 'master' of https://www.github.com/TriNetX/MedTagger
Browse files Browse the repository at this point in the history
� Conflicts:
�	src/main/java/org/ohnlp/medtagger/ae/AhoCorasickLookupAnnotator.java
�	src/main/java/org/ohnlp/medtagger/dict/DictWriter.java
�	src/main/java/org/ohnlp/medtagger/ie/ae/MedTaggerIEAnnotator.java
  • Loading branch information
qqndrew committed Mar 10, 2022
2 parents 2be0f23 + 48c686c commit f01cb79
Show file tree
Hide file tree
Showing 15 changed files with 122 additions and 87 deletions.
13 changes: 9 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@

<properties>
<uimaj.version>2.10.0</uimaj.version>
<uimafit.version>2.4.0</uimafit.version>
<uimafit.version>2.5.0</uimafit.version>
</properties>

<dependencies>
Expand Down Expand Up @@ -170,9 +170,14 @@
<version>1.3.3</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.17.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.17.1</version>
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,67 +1,68 @@
/*******************************************************************************
* Copyright: (c) 2013 Mayo Foundation for Medical Education and
* Copyright: (c) 2013 Mayo Foundation for Medical Education and
* Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
* triple-shield Mayo logo are trademarks and service marks of MFMER.
*
* Except as contained in the copyright notice above, or as used to identify
*
* Except as contained in the copyright notice above, or as used to identify
* MFMER as the author of this software, the trade names, trademarks, service
* marks, or product names of the copyright holder shall not be used in
* advertising, promotion or otherwise in connection with this software without
* prior written authorization of the copyright holder.
*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

package org.ohnlp.medtagger.ae;

import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Vector;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.core.config.Configurator;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;
import org.ohnlp.typesystem.type.textspan.Segment;
import org.ohnlp.typesystem.type.textspan.Sentence;
import org.ohnlp.medtagger.dict.AhoCorasickDict;
import org.ohnlp.medtagger.lvg.LvgLookup;
import org.ohnlp.medtagger.type.ConceptMention;
import org.ohnlp.typesystem.type.syntax.BaseToken;
import org.ohnlp.typesystem.type.syntax.NumToken;
import org.ohnlp.typesystem.type.syntax.PunctuationToken;
import org.ohnlp.typesystem.type.syntax.WordToken;
import org.ohnlp.medtagger.dict.AhoCorasickDict;
import org.ohnlp.medtagger.lvg.LvgLookup;
import org.ohnlp.medtagger.type.ConceptMention;
import org.ohnlp.typesystem.type.textspan.Segment;
import org.ohnlp.typesystem.type.textspan.Sentence;

import java.io.*;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Vector;

/**
* @author Hongfang Liu
*/
public class AhoCorasickLookupAnnotator extends JCasAnnotator_ImplBase {

// LOG4J logger based on class name
private Logger logger = Logger.getLogger(getClass().getName());
private boolean LONGEST = true;
// LOG4J logger based on class name
private Logger logger = LogManager.getLogger(getClass().getName());
private boolean LONGEST = true;

// data structure that stores the TRIE
AhoCorasickDict btac;
Expand All @@ -70,11 +71,11 @@ public class AhoCorasickLookupAnnotator extends JCasAnnotator_ImplBase {
// add the path in resources
LvgLookup lvg;

@Override
public void initialize(UimaContext aContext)
throws ResourceInitializationException {
super.initialize(aContext);
logger.setLevel(Level.DEBUG);
@Override
public void initialize(UimaContext aContext)
throws ResourceInitializationException {
super.initialize(aContext);
Configurator.setLevel(logger.getName(), Level.DEBUG);

try {
lvg = new LvgLookup(aContext);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
import java.util.ArrayList;
import java.util.Iterator;

import org.apache.log4j.Logger;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
Expand All @@ -42,7 +43,7 @@

public class LineSentenceDetector extends JCasAnnotator_ImplBase {

private Logger logger = Logger.getLogger(getClass().getName());
private Logger logger = LogManager.getLogger(getClass().getName());

public static final byte CAPS_UNKNOWN = 0;
public static final byte CAPS_NONE = 1;
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/ohnlp/medtagger/ae/Open2OHTokenizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
import java.util.ArrayList;
import java.util.Iterator;

import org.apache.log4j.Logger;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
Expand All @@ -42,7 +43,7 @@

public class Open2OHTokenizer extends JCasAnnotator_ImplBase {

private Logger logger = Logger.getLogger(getClass().getName());
private Logger logger = LogManager.getLogger(getClass().getName());

public static final byte CAPS_UNKNOWN = 0;
public static final byte CAPS_NONE = 1;
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/ohnlp/medtagger/dict/AhoCorasickDict.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@
import java.util.Set;
import java.util.Vector;

import org.apache.log4j.Logger;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;


public class AhoCorasickDict {

// LOG4J logger based on class name
private Logger iv_logger = Logger.getLogger(getClass().getName());
private Logger iv_logger = LogManager.getLogger(getClass().getName());

public static final String FAILLINK="_FAIL_";
public static final String SLDELIM="||";
Expand Down
9 changes: 7 additions & 2 deletions src/main/java/org/ohnlp/medtagger/dict/DictWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,14 @@
import java.util.HashSet;
import java.util.Iterator;

import org.apache.log4j.Logger;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CasConsumer_ImplBase;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.resource.ResourceInitializationException;
Expand Down Expand Up @@ -94,6 +98,7 @@ else if(token instanceof PunctuationToken){
pwr.flush();
}


@Override
public void destroy() {
super.destroy();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
import java.util.ArrayList;
import java.util.Iterator;

import org.apache.log4j.Logger;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
Expand All @@ -42,7 +43,7 @@

public class LineSentenceDetector extends JCasAnnotator_ImplBase {

private Logger logger = Logger.getLogger(getClass().getName());
private Logger logger = LogManager.getLogger(getClass().getName());

public static final byte CAPS_UNKNOWN = 0;
public static final byte CAPS_NONE = 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
package org.ohnlp.medtagger.ie.ae;

import org.apache.commons.lang3.SerializationUtils;
import org.apache.log4j.Logger;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.cas.FSIterator;
Expand Down Expand Up @@ -60,7 +61,7 @@ public class MedTaggerDynamicIEAnnotator extends JCasAnnotator_ImplBase {
//private Boolean hyphen2space = false;
private Boolean punct2space = false;

private Logger iv_logger = Logger.getLogger(getClass().getName());
private Logger iv_logger = LogManager.getLogger(getClass().getName());

public void initialize(UimaContext aContext)
throws ResourceInitializationException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@
*******************************************************************************/
package org.ohnlp.medtagger.ie.ae;

import org.apache.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
Expand Down Expand Up @@ -67,7 +68,7 @@ public class MedTaggerIEAnnotator extends JCasAnnotator_ImplBase {
//private Boolean hyphen2space = false;
private Boolean punct2space = false;

private Logger iv_logger = Logger.getLogger(getClass().getName());
private Logger iv_logger = LogManager.getLogger(getClass().getName());

public ResourceUtilManager rum;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;


/**
Expand All @@ -53,7 +54,7 @@ public class ResourceUtilManager implements Serializable {
public transient static String RESOURCEDIR;
private transient static ResourceUtilManager INSTANCE = null;

private transient Logger iv_logger = Logger.getLogger(getClass().getName());
private transient Logger iv_logger = LogManager.getLogger(getClass().getName());

private Pattern regexpPattern = Pattern.compile("(.*)");
private Pattern normPattern = Pattern.compile("^(.*?)\t(.*?)$");
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/ohnlp/medtagger/lvg/LvgLookup.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
import java.util.Iterator;
import java.util.Scanner;

import org.apache.log4j.Logger;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
Expand All @@ -46,7 +47,7 @@
public class LvgLookup extends JCasAnnotator_ImplBase {

// LOG4J logger based on class name
private Logger logger = Logger.getLogger("LvgLookup");
private Logger logger = LogManager.getLogger("LvgLookup");
HashMap<String, String> lvgMap;
HashSet<String> openclass;
//private static OpenClassWords pds = new OpenClassWords();
Expand Down
19 changes: 15 additions & 4 deletions src/main/java/org/ohnlp/medtagger/ml/cr/transShareAnnotation.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
import org.ohnlp.medtagger.ml.type.shareAnnotation;
import org.ohnlp.medtagger.ml.type.shareSlot;

import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.SAXParser;

Expand All @@ -52,27 +54,36 @@ public transShareAnnotation(JCas jcas) {
};

public transShareAnnotation(String str, JCas jcas) {
SAXParserFactory factory = SAXParserFactory.newInstance();
mjcas = jcas;
try {
SAXParser saxParser = factory.newSAXParser();
SAXParser saxParser = createSaxParser();
saxParser.parse(new ByteArrayInputStream(str.getBytes()), this);
} catch (Throwable t) {
t.printStackTrace();
}
}

public transShareAnnotation(File xmlfile, JCas jcas) {
SAXParserFactory factory = SAXParserFactory.newInstance();
mjcas=jcas;
try {
SAXParser saxParser = factory.newSAXParser();
SAXParser saxParser = createSaxParser();
saxParser.parse(xmlfile, this);
} catch (Throwable t) {
t.printStackTrace();
}
}

private SAXParser createSaxParser() throws ParserConfigurationException, SAXException {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
SAXParser saxParser = factory.newSAXParser(); // Noncompliant
saxParser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
saxParser.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
return saxParser;
}

// ===========================================================
// SAX DocumentHandler methods
// ===========================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,15 @@
import java.util.List;
import java.util.Set;

import org.apache.log4j.Logger;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.ohnlp.medtagger.ml.feature.Feature;
import org.ohnlp.medtagger.ml.util.Executer;
import org.ohnlp.medtagger.ml.util.PlatformDetection;


public class CRFSuiteWrapper {
private Logger iv_logger = Logger.getLogger(getClass().getName());
private Logger iv_logger = LogManager.getLogger(getClass().getName());
private File executable;

public CRFSuiteWrapper() throws Exception {
Expand Down
Loading

0 comments on commit f01cb79

Please sign in to comment.