Skip to content

Commit

Permalink
added faster minilm model to overcome slow
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidFromPandora committed May 5, 2022
1 parent 7230d5f commit 172ee43
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 8 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ text = (
" noodles, but they don't even know him."
)

# choose minilm for speed/memory and info_xlm for accuracy
predictor = Predictor(
language="en_core_web_sm", device=-1, model_name="info_xlm"
language="en_core_web_sm", device=-1, model_name="minilm"
)

print(predictor.predict(text)["resolved_text"])
Expand All @@ -44,7 +45,7 @@ from crosslingual_coreference import Predictor
predictor = Predictor(
language="en_core_web_sm",
device=0,
model_name="info_xlm",
model_name="minilm",
chunk_size=2500,
chunk_overlap=2,
)
Expand Down Expand Up @@ -86,6 +87,6 @@ print(doc._.resolved_text)
# but Many students don't even know Momofuku Ando.
```
## Available models
As of now, there are two models available "info_xlm", "xlm_roberta", which scored 77 and 74 on OntoNotes Release 5.0 English data, respectively.
As of now, there are two models available "info_xlm", "xlm_roberta", "minilm", which scored 77, 74 and 74 on OntoNotes Release 5.0 English data, respectively.
## More Examples
![](https://raw.githubusercontent.com/Pandora-Intelligence/crosslingual-coreference/master/img/example_total.png)
9 changes: 8 additions & 1 deletion crosslingual_coreference/CrossLingualPredictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@
"f1_score_ontonotes": 77,
"file_extension": ".tar.gz",
},
"minilm": {
"url": (
"https://storage.googleapis.com/pandora-intelligence/models/crosslingual-coreference/minilm/model.tar.gz"
),
"f1_score_ontonotes": 74,
"file_extension": ".tar.gz",
},
}


Expand All @@ -28,7 +35,7 @@ def __init__(
self,
language: str,
device: int = -1,
model_name: str = "info_xlm",
model_name: str = "minilm",
chunk_size: Union[int, None] = None, # determines the # sentences per batch
chunk_overlap: int = 2, # determines the # of overlapping sentences per chunk
) -> None:
Expand Down
2 changes: 1 addition & 1 deletion crosslingual_coreference/CrossLingualPredictorSpacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(
self,
language: str,
device: int = -1,
model_name: str = "info_xlm",
model_name: str = "minilm",
chunk_size: Union[int, None] = None,
chunk_overlap: int = 2,
) -> None:
Expand Down
2 changes: 1 addition & 1 deletion crosslingual_coreference/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"xx_coref",
default_config={
"device": -1,
"model_name": "info_xlm",
"model_name": "minilm",
"chunk_size": None,
"chunk_overlap": 2,
},
Expand Down
2 changes: 1 addition & 1 deletion crosslingual_coreference/examples/test_spacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

nlp = spacy.load("nl_core_news_sm")

nlp.add_pipe("xx_coref", config={"model_name": "xlm_roberta"})
nlp.add_pipe("xx_coref", config={"model_name": "minilm"})

for doc in nlp.pipe(texts):
print(doc._.coref_clusters)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "crosslingual-coreference"
version = "0.2.1"
version = "0.2.2"
description = "A multi-lingual approach to AllenNLP CoReference Resolution, along with a wrapper for spaCy."
authors = ["David Berenstein <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 172ee43

Please sign in to comment.