-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from andrewresearch/develop
develop
- Loading branch information
Showing
378 changed files
with
4,680 additions
and
28,168 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,3 +51,4 @@ crashlytics-build.properties | |
fabric.properties | ||
|
||
/.idea/* | ||
/src/main/scala/worksheet.sc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
package cc.factorie.app.nlp | ||
|
||
/** | ||
* Created by [email protected] on 27/10/17. | ||
*/ | ||
|
||
/** A simple concrete implementation of Section. */ | ||
class BasicSection(val document:Document, val stringStart:Int, val stringEnd:Int) extends Section |
Large diffs are not rendered by default.
Oops, something went wrong.
36 changes: 36 additions & 0 deletions
36
src/main/scala/cc/factorie/app/nlp/DocumentAnnotationPipeline.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package cc.factorie.app.nlp | ||
|
||
/**User: apassos | ||
* Date: 8/7/13 | ||
* Time: 2:48 PM | ||
*/ | ||
|
||
/** A sequence of DocumentAnnotators packaged as a single DocumentAnnotator. | ||
This class also properly populates the Document.annotators with a record of which DocumentAnnotator classes provided which annotation classes. */ | ||
class DocumentAnnotationPipeline(val annotators: Seq[DocumentAnnotator], val prereqAttrs: Seq[Class[_]] = Seq()) extends DocumentAnnotator { | ||
var profile = false | ||
var tokensProcessed = 0 | ||
var msProcessed = 0L | ||
val timePerAnnotator = collection.mutable.LinkedHashMap[DocumentAnnotator,Long]() | ||
def postAttrs = annotators.flatMap(_.postAttrs).distinct | ||
def process(document: Document) = { | ||
var doc = document | ||
val t00 = System.currentTimeMillis() | ||
for (annotator <- annotators; if annotator.postAttrs.forall(!doc.hasAnnotation(_))) { | ||
val t0 = System.currentTimeMillis() | ||
doc = annotator.process(doc) | ||
if (profile) timePerAnnotator(annotator) = timePerAnnotator.getOrElse(annotator, 0L) + System.currentTimeMillis() - t0 | ||
annotator.postAttrs.foreach(a => document.annotators(a) = annotator.getClass) | ||
} | ||
if (profile) { | ||
msProcessed += System.currentTimeMillis() - t00 | ||
tokensProcessed += doc.tokenCount | ||
} | ||
doc | ||
} | ||
def profileReport: String = { | ||
s"Processed $tokensProcessed tokens in ${msProcessed/1000.0} seconds, at ${tokensProcessed.toDouble*1000.0/msProcessed} tokens / second " + | ||
"Speeds of individual components:\n" + timePerAnnotator.map(i => f" ${i._1.getClass.getSimpleName}%30s: ${tokensProcessed.toDouble*1000.0/i._2}%4.4f tokens/sec ").mkString("\n") | ||
} | ||
def tokenAnnotationString(token: Token): String = annotators.map(_.tokenAnnotationString(token)).mkString("\t") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
package cc.factorie.app.nlp | ||
|
||
/** Used as an attribute on Document to hold the document's name. */ | ||
case class DocumentName(string:String) { | ||
override def toString: String = string | ||
} |
16 changes: 16 additions & 0 deletions
16
src/main/scala/cc/factorie/app/nlp/DocumentSubstring.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
package cc.factorie.app.nlp | ||
|
||
/** A portion of the string contents of a Document. | ||
* | ||
*@author Andrew McCallum */ | ||
trait DocumentSubstring { | ||
/** The Document of which this DocumentSubstring is a part. */ | ||
def document: Document | ||
/** The character offset into the Document.string at which this DocumentSubstring begins. */ | ||
def stringStart: Int | ||
/** The character offset into the Document.string at which this DocumentSubstring is over. | ||
In other words, the last character of the DocumentSubstring is Document.string(this.stringEnd-1). */ | ||
def stringEnd: Int | ||
/** The substring of the Document encompassed by this DocumentSubstring. */ | ||
def string: String | ||
} |
7 changes: 7 additions & 0 deletions
7
src/main/scala/cc/factorie/app/nlp/MutableDocumentAnnotatorMap.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package cc.factorie.app.nlp | ||
|
||
/** A Map from annotation class to DocumentAnnotator that provides that annotation. | ||
*Used to store default ways of getting certain prerequisite annotations. */ | ||
class MutableDocumentAnnotatorMap extends collection.mutable.LinkedHashMap[Class[_], () => DocumentAnnotator] { | ||
def +=(annotator: DocumentAnnotator) = annotator.postAttrs.foreach(a => this(a) = () => annotator) | ||
} |
Oops, something went wrong.