Skip to content

Commit

Permalink
added AllenNLP prediction batching for Coref Predictor
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidFromPandora committed May 5, 2022
1 parent 172ee43 commit 7463efb
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 9 deletions.
34 changes: 26 additions & 8 deletions crosslingual_coreference/CrossLingualPredictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,17 +124,35 @@ def predict(self, text: str, advanced_resolve: bool = True) -> dict:

return prediction

def pipe(self, texts: List[str], advanced_resolve: bool = True) -> List[dict]:
def pipe(self, texts: List[str], advanced_resolve: bool = True):
"""
> The function takes a list of strings and returns a list of dictionaries
Produce a document where each coreference is replaced by its main mention
:param texts: List[str]
:type texts: List[str]
:param advanced_resolve: If True, the model will try to resolve the ambiguity of the entities, defaults to True
:type advanced_resolve: bool (optional)
:return: A list of dictionaries.
# Parameters
document : List[`str`]
A string representation of a document.
# Returns
A string with each coreference replaced by its main mention
"""
return [self.predict(text, advanced_resolve) for text in texts]

spacy_document_list = list(self.predictor._spacy.pipe(texts))
json_batch = [{"document": document} for document in texts]
json_predictions = self.predictor.predict_batch_json(json_batch)
clusters_predictions = [prediction.get("clusters") for prediction in json_predictions]

predictions = []
for spacy_doc, cluster in zip(spacy_document_list, clusters_predictions):
predictions.append(
{
"clusters": cluster,
"resolved_text": self.resolver.replace_corefs(spacy_doc, cluster),
}
)

return predictions

def chunk_sentencized_doc(self, doc: Doc) -> List[str]:
"""Split spacy doc object into chunks of maximum size 'chunk_size' with
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "crosslingual-coreference"
version = "0.2.2"
version = "0.2.3"
description = "A multi-lingual approach to AllenNLP CoReference Resolution, along with a wrapper for spaCy."
authors = ["David Berenstein <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 7463efb

Please sign in to comment.