Skip to content

Commit

Permalink
switch to MeCab and fix asyncio use.
Browse files Browse the repository at this point in the history
Nagisa fails to install due to issues with the DyNET build
clab/dynet#1662
asyncio does not accept plain coroutines anymore
  • Loading branch information
tlaufkoetter committed May 7, 2023
1 parent 4f68de1 commit 6226bc4
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
1 change: 1 addition & 0 deletions src/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
venv/
11 changes: 5 additions & 6 deletions src/segmentize_and_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import sys
import regex as re
import nagisa
import MeCab
import requests
import asyncio as aio

Expand Down Expand Up @@ -50,18 +50,17 @@ async def get_meaning(dictionary, word, progress_bar):

async def main(meanings):
words = set()

wakati = MeCab.Tagger('-Owakati')
with open(sys.argv[1], 'r') as file:
content = file.read()
for word in filter(
words = {word for word in filter(
validate_word,
nagisa.wakati(content)):
words.add(word)
wakati.parse(content).split())}

print("Extracted {} words".format(len(words)))

progress_bar = ProgressBar(len(words))
coroutines = [get_meaning(meanings, word, progress_bar) for word in words]
coroutines = [aio.create_task(get_meaning(meanings, word, progress_bar)) for word in words]
await aio.wait(coroutines)
print("\nDone.")

Expand Down

0 comments on commit 6226bc4

Please sign in to comment.