Skip to content

Commit

Permalink
simplified exception handling code
Browse files Browse the repository at this point in the history
  • Loading branch information
0x4f53 committed Nov 23, 2023
1 parent a8ed80c commit 0ec2088
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions text_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,15 @@ def regional_pii(text):
from nltk import word_tokenize, pos_tag, ne_chunk
from nltk.corpus import stopwords

resources = ["punkt", "maxent_ne_chunker", "stopwords", "words", "averaged_perceptron_tagger"]

try:
nltk_resources = ["tokenizers/punkt", "chunkers/maxent_ne_chunker", "corpora/words.zip"]
for resource in nltk_resources:
if not nltk.data.find(resource): raise LookupError()
except LookupError:
nltk.download('punkt')
nltk.download('maxent_ne_chunker')
nltk.download('stopwords')
nltk.download('words')
nltk.download('averaged_perceptron_tagger')
for resource in resources:
nltk.download(resource)

stop_words = set(stopwords.words('english'))

Expand Down

0 comments on commit 0ec2088

Please sign in to comment.