diff --git a/examples/batch_process.py b/examples/batch_process.py index 865ad0f..858d0ac 100644 --- a/examples/batch_process.py +++ b/examples/batch_process.py @@ -1,31 +1,22 @@ -from collections import Counter -from pathlib import Path +import logging import time +from pathlib import Path -import typer -import pandas as pd -import spacy -from spacy.language import Language -from spacy.tokens import Token, Doc -from spacy.pipeline import Pipe -import numpy as np import jsonlines -from tqdm import tqdm -import re -import torch +import numpy as np import pandas as pd +import spacy +import torch +import typer +from torch.utils.data import DataLoader +from tqdm import tqdm +from utilities import spacy_doc_setup -from torch.utils.data import Dataset, DataLoader import mordecai3.elastic_utilities as es_util from mordecai3.geoparse import doc_to_ex_expanded +from mordecai3.roberta_qa import setup_qa from mordecai3.torch_model import ProductionData, geoparse_model -from mordecai3.roberta_qa import setup_qa -from utilities import spacy_doc_setup -from elasticsearch import Elasticsearch, helpers -from elasticsearch_dsl import Search, Q - -import logging logger = logging.getLogger() handler = logging.StreamHandler() formatter = logging.Formatter( diff --git a/examples/batch_process_production.py b/examples/batch_process_production.py index ddc5e12..c2d5ff2 100644 --- a/examples/batch_process_production.py +++ b/examples/batch_process_production.py @@ -1,27 +1,23 @@ -from collections import Counter -from pathlib import Path +import logging +import re import time from configparser import ConfigParser +from pathlib import Path -import typer -import pandas as pd -import spacy -import numpy as np import jsonlines -from tqdm import tqdm -import re -import torch +import numpy as np import pandas as pd - +import spacy +import torch +import typer from torch.utils.data import DataLoader +from tqdm import tqdm + import mordecai3.elastic_utilities as es_util from mordecai3.geoparse import Geoparser, doc_to_ex_expanded from mordecai3.torch_model import ProductionData -from mordecai3.roberta_qa import setup_qa from mordecai3.utilities import spacy_doc_setup - -import logging logger = logging.getLogger(__name__) handler = logging.StreamHandler() formatter = logging.Formatter( diff --git a/examples/production_event.py b/examples/production_event.py index 7c3307e..c9fcfaf 100644 --- a/examples/production_event.py +++ b/examples/production_event.py @@ -1,22 +1,19 @@ -import json - +import numpy as np +import pandas as pd +import spacy import streamlit as st import torch -import pandas as pd from elasticsearch import Elasticsearch -from elasticsearch_dsl import Search, Q -import spacy -from spacy.tokens import Token, Doc -from spacy.pipeline import Pipe -import numpy as np +from elasticsearch_dsl import Search from torch.utils.data import DataLoader import mordecai3.elastic_utilities as es_util from mordecai3.geoparse import doc_to_ex_expanded +from mordecai3.roberta_qa import add_event_loc, setup_qa from mordecai3.torch_model import ProductionData, geoparse_model -from mordecai3.roberta_qa import setup_qa, add_event_loc from mordecai3.utilities import spacy_doc_setup + # for dumping raw output to JSON # https://stackoverflow.com/a/52604722 def default(obj): diff --git a/mordecai3/elastic_utilities.py b/mordecai3/elastic_utilities.py index 082ccc8..17fd578 100644 --- a/mordecai3/elastic_utilities.py +++ b/mordecai3/elastic_utilities.py @@ -1,12 +1,13 @@ -from elasticsearch import Elasticsearch, helpers -from elasticsearch_dsl import Search, Q -import numpy as np -import jellyfish -from collections import Counter -import warnings +import logging import re +import warnings +from collections import Counter + +import jellyfish +import numpy as np +from elasticsearch import Elasticsearch +from elasticsearch_dsl import Q, Search -import logging logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) diff --git a/mordecai3/error_analysis.py b/mordecai3/error_analysis.py index acd3b43..2d1b8ac 100644 --- a/mordecai3/error_analysis.py +++ b/mordecai3/error_analysis.py @@ -1,17 +1,17 @@ -from torch_model import TrainData, geoparse_model -from train import load_data -from torch.utils.data import DataLoader -import torch +import logging from collections import Counter -import numpy as np -from rich.console import Console -from rich.table import Table -import typer from pathlib import Path +import numpy as np +import torch +import typer from error_utils import evaluate_results, make_wandb_dict +from rich.console import Console +from rich.table import Table +from torch.utils.data import DataLoader +from torch_model import TrainData, geoparse_model +from train import load_data -import logging logger = logging.getLogger() handler = logging.StreamHandler() formatter = logging.Formatter( @@ -40,7 +40,6 @@ import wandb - def make_missing_table(cutoff, names, datasets): table = Table(show_header=True, header_style="bold magenta") table.add_column("Dataset") diff --git a/mordecai3/error_utils.py b/mordecai3/error_utils.py index 8926d21..260f7c1 100644 --- a/mordecai3/error_utils.py +++ b/mordecai3/error_utils.py @@ -1,6 +1,6 @@ -import torch -import numpy as np import haversine as hs +import numpy as np +import torch #es_data = datasets[2] #loader = data_loaders[2] diff --git a/mordecai3/geoparse.py b/mordecai3/geoparse.py index af9165d..e889d27 100644 --- a/mordecai3/geoparse.py +++ b/mordecai3/geoparse.py @@ -1,24 +1,24 @@ -import jsonlines -from tqdm import tqdm -import re +import logging import os +import re -import torch -import pandas as pd -import spacy -from spacy.language import Language -from spacy.tokens import Token, Span, Doc -from spacy.pipeline import Pipe import numpy as np -from torch.utils.data import Dataset, DataLoader import pkg_resources - -from mordecai3.elastic_utilities import make_conn, get_entry_by_id, get_adm1_country_entry, get_country_entry, add_es_data_doc -from mordecai3.torch_model import ProductionData, geoparse_model -from mordecai3.roberta_qa import setup_qa, add_event_loc +import spacy +import torch +from torch.utils.data import DataLoader + +from mordecai3.elastic_utilities import ( + add_es_data_doc, + get_adm1_country_entry, + get_country_entry, + get_entry_by_id, + make_conn, +) from mordecai3.mordecai_utilities import spacy_doc_setup +from mordecai3.roberta_qa import add_event_loc, setup_qa +from mordecai3.torch_model import ProductionData, geoparse_model -import logging logger = logging.getLogger() handler = logging.StreamHandler() formatter = logging.Formatter( diff --git a/mordecai3/mordecai_streamlit.py b/mordecai3/mordecai_streamlit.py index 13c93f0..b29af0a 100644 --- a/mordecai3/mordecai_streamlit.py +++ b/mordecai3/mordecai_streamlit.py @@ -1,21 +1,12 @@ -import spacy -from spacy.language import Language -from spacy.tokens import Token, Doc -from spacy.pipeline import Pipe import numpy as np -import jsonlines -from tqdm import tqdm -import re +import spacy import streamlit as st import torch -from torch.utils.data import Dataset, DataLoader - -from elastic_utilities import res_formatter, add_es_data -from torch_model import ProductionData, geoparse_model -from geoparse import doc_to_ex_expanded, Geoparser - -from elasticsearch import Elasticsearch, helpers -from elasticsearch_dsl import Search, Q +from elasticsearch import Elasticsearch +from elasticsearch_dsl import Search +from spacy.language import Language +from spacy.tokens import Token +from torch_model import geoparse_model HTML_WRAPPER = """