diff --git a/crosslingual_coreference/CrossLingualPredictor.py b/crosslingual_coreference/CrossLingualPredictor.py index 1c21ebf..a77c6e0 100644 --- a/crosslingual_coreference/CrossLingualPredictor.py +++ b/crosslingual_coreference/CrossLingualPredictor.py @@ -124,17 +124,35 @@ def predict(self, text: str, advanced_resolve: bool = True) -> dict: return prediction - def pipe(self, texts: List[str], advanced_resolve: bool = True) -> List[dict]: + def pipe(self, texts: List[str], advanced_resolve: bool = True): """ - > The function takes a list of strings and returns a list of dictionaries + Produce a document where each coreference is replaced by its main mention - :param texts: List[str] - :type texts: List[str] - :param advanced_resolve: If True, the model will try to resolve the ambiguity of the entities, defaults to True - :type advanced_resolve: bool (optional) - :return: A list of dictionaries. + # Parameters + + document : List[`str`] + A string representation of a document. + + # Returns + + A string with each coreference replaced by its main mention """ - return [self.predict(text, advanced_resolve) for text in texts] + + spacy_document_list = list(self.predictor._spacy.pipe(texts)) + json_batch = [{"document": document} for document in texts] + json_predictions = self.predictor.predict_batch_json(json_batch) + clusters_predictions = [prediction.get("clusters") for prediction in json_predictions] + + predictions = [] + for spacy_doc, cluster in zip(spacy_document_list, clusters_predictions): + predictions.append( + { + "clusters": cluster, + "resolved_text": self.resolver.replace_corefs(spacy_doc, cluster), + } + ) + + return predictions def chunk_sentencized_doc(self, doc: Doc) -> List[str]: """Split spacy doc object into chunks of maximum size 'chunk_size' with diff --git a/pyproject.toml b/pyproject.toml index 3e6047f..be591f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "crosslingual-coreference" -version = "0.2.2" +version = "0.2.3" description = "A multi-lingual approach to AllenNLP CoReference Resolution, along with a wrapper for spaCy." authors = ["David Berenstein "] license = "MIT"