From 25caa50392c6003677e8a5f3f315d44e524c790d Mon Sep 17 00:00:00 2001 From: DanShatford Date: Fri, 1 Sep 2023 19:34:39 +0100 Subject: [PATCH 1/2] Sort imports `isort --profile black .` --- examples/batch_process.py | 30 ++++++++++----------- examples/batch_process_production.py | 24 ++++++++--------- examples/production_event.py | 13 ++++----- mordecai3/elastic_utilities.py | 15 ++++++----- mordecai3/error_analysis.py | 19 +++++++------ mordecai3/error_utils.py | 4 +-- mordecai3/geoparse.py | 30 ++++++++++++--------- mordecai3/mordecai_streamlit.py | 27 +++++++++---------- mordecai3/mordecai_utilities.py | 4 +-- mordecai3/roberta_qa.py | 2 +- mordecai3/tests/conftest.py | 5 ++-- mordecai3/tests/test_mordecai3.py | 2 ++ mordecai3/torch_model.py | 6 ++--- mordecai3/train.py | 40 +++++++++++++++------------- setup.py | 3 ++- 15 files changed, 116 insertions(+), 108 deletions(-) diff --git a/examples/batch_process.py b/examples/batch_process.py index 865ad0f..ac869da 100644 --- a/examples/batch_process.py +++ b/examples/batch_process.py @@ -1,31 +1,29 @@ +import logging +import re +import time from collections import Counter from pathlib import Path -import time -import typer -import pandas as pd +import jsonlines +import numpy as np +import pandas as pd import spacy +import torch +import typer +from elasticsearch import Elasticsearch, helpers +from elasticsearch_dsl import Q, Search from spacy.language import Language -from spacy.tokens import Token, Doc from spacy.pipeline import Pipe -import numpy as np -import jsonlines +from spacy.tokens import Doc, Token +from torch.utils.data import DataLoader, Dataset from tqdm import tqdm -import re -import torch -import pandas as pd +from utilities import spacy_doc_setup -from torch.utils.data import Dataset, DataLoader import mordecai3.elastic_utilities as es_util from mordecai3.geoparse import doc_to_ex_expanded +from mordecai3.roberta_qa import setup_qa from mordecai3.torch_model import ProductionData, geoparse_model -from mordecai3.roberta_qa import setup_qa -from utilities import spacy_doc_setup -from elasticsearch import Elasticsearch, helpers -from elasticsearch_dsl import Search, Q - -import logging logger = logging.getLogger() handler = logging.StreamHandler() formatter = logging.Formatter( diff --git a/examples/batch_process_production.py b/examples/batch_process_production.py index ddc5e12..fa004c8 100644 --- a/examples/batch_process_production.py +++ b/examples/batch_process_production.py @@ -1,27 +1,25 @@ -from collections import Counter -from pathlib import Path +import logging +import re import time +from collections import Counter from configparser import ConfigParser +from pathlib import Path -import typer -import pandas as pd -import spacy -import numpy as np import jsonlines -from tqdm import tqdm -import re -import torch +import numpy as np import pandas as pd - +import spacy +import torch +import typer from torch.utils.data import DataLoader +from tqdm import tqdm + import mordecai3.elastic_utilities as es_util from mordecai3.geoparse import Geoparser, doc_to_ex_expanded -from mordecai3.torch_model import ProductionData from mordecai3.roberta_qa import setup_qa +from mordecai3.torch_model import ProductionData from mordecai3.utilities import spacy_doc_setup - -import logging logger = logging.getLogger(__name__) handler = logging.StreamHandler() formatter = logging.Formatter( diff --git a/examples/production_event.py b/examples/production_event.py index 7c3307e..792c9d0 100644 --- a/examples/production_event.py +++ b/examples/production_event.py @@ -1,22 +1,23 @@ import json +import numpy as np +import pandas as pd +import spacy import streamlit as st import torch -import pandas as pd from elasticsearch import Elasticsearch -from elasticsearch_dsl import Search, Q -import spacy -from spacy.tokens import Token, Doc +from elasticsearch_dsl import Q, Search from spacy.pipeline import Pipe -import numpy as np +from spacy.tokens import Doc, Token from torch.utils.data import DataLoader import mordecai3.elastic_utilities as es_util from mordecai3.geoparse import doc_to_ex_expanded +from mordecai3.roberta_qa import add_event_loc, setup_qa from mordecai3.torch_model import ProductionData, geoparse_model -from mordecai3.roberta_qa import setup_qa, add_event_loc from mordecai3.utilities import spacy_doc_setup + # for dumping raw output to JSON # https://stackoverflow.com/a/52604722 def default(obj): diff --git a/mordecai3/elastic_utilities.py b/mordecai3/elastic_utilities.py index 082ccc8..1537c9b 100644 --- a/mordecai3/elastic_utilities.py +++ b/mordecai3/elastic_utilities.py @@ -1,12 +1,13 @@ -from elasticsearch import Elasticsearch, helpers -from elasticsearch_dsl import Search, Q -import numpy as np -import jellyfish -from collections import Counter -import warnings +import logging import re +import warnings +from collections import Counter + +import jellyfish +import numpy as np +from elasticsearch import Elasticsearch, helpers +from elasticsearch_dsl import Q, Search -import logging logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) diff --git a/mordecai3/error_analysis.py b/mordecai3/error_analysis.py index acd3b43..2d1b8ac 100644 --- a/mordecai3/error_analysis.py +++ b/mordecai3/error_analysis.py @@ -1,17 +1,17 @@ -from torch_model import TrainData, geoparse_model -from train import load_data -from torch.utils.data import DataLoader -import torch +import logging from collections import Counter -import numpy as np -from rich.console import Console -from rich.table import Table -import typer from pathlib import Path +import numpy as np +import torch +import typer from error_utils import evaluate_results, make_wandb_dict +from rich.console import Console +from rich.table import Table +from torch.utils.data import DataLoader +from torch_model import TrainData, geoparse_model +from train import load_data -import logging logger = logging.getLogger() handler = logging.StreamHandler() formatter = logging.Formatter( @@ -40,7 +40,6 @@ import wandb - def make_missing_table(cutoff, names, datasets): table = Table(show_header=True, header_style="bold magenta") table.add_column("Dataset") diff --git a/mordecai3/error_utils.py b/mordecai3/error_utils.py index 8926d21..260f7c1 100644 --- a/mordecai3/error_utils.py +++ b/mordecai3/error_utils.py @@ -1,6 +1,6 @@ -import torch -import numpy as np import haversine as hs +import numpy as np +import torch #es_data = datasets[2] #loader = data_loaders[2] diff --git a/mordecai3/geoparse.py b/mordecai3/geoparse.py index af9165d..84be93e 100644 --- a/mordecai3/geoparse.py +++ b/mordecai3/geoparse.py @@ -1,24 +1,30 @@ -import jsonlines -from tqdm import tqdm -import re +import logging import os +import re -import torch +import jsonlines +import numpy as np import pandas as pd +import pkg_resources import spacy +import torch from spacy.language import Language -from spacy.tokens import Token, Span, Doc from spacy.pipeline import Pipe -import numpy as np -from torch.utils.data import Dataset, DataLoader -import pkg_resources +from spacy.tokens import Doc, Span, Token +from torch.utils.data import DataLoader, Dataset +from tqdm import tqdm -from mordecai3.elastic_utilities import make_conn, get_entry_by_id, get_adm1_country_entry, get_country_entry, add_es_data_doc -from mordecai3.torch_model import ProductionData, geoparse_model -from mordecai3.roberta_qa import setup_qa, add_event_loc +from mordecai3.elastic_utilities import ( + add_es_data_doc, + get_adm1_country_entry, + get_country_entry, + get_entry_by_id, + make_conn, +) from mordecai3.mordecai_utilities import spacy_doc_setup +from mordecai3.roberta_qa import add_event_loc, setup_qa +from mordecai3.torch_model import ProductionData, geoparse_model -import logging logger = logging.getLogger() handler = logging.StreamHandler() formatter = logging.Formatter( diff --git a/mordecai3/mordecai_streamlit.py b/mordecai3/mordecai_streamlit.py index 13c93f0..81bd78e 100644 --- a/mordecai3/mordecai_streamlit.py +++ b/mordecai3/mordecai_streamlit.py @@ -1,21 +1,20 @@ -import spacy -from spacy.language import Language -from spacy.tokens import Token, Doc -from spacy.pipeline import Pipe -import numpy as np -import jsonlines -from tqdm import tqdm import re + +import jsonlines +import numpy as np +import spacy import streamlit as st import torch -from torch.utils.data import Dataset, DataLoader - -from elastic_utilities import res_formatter, add_es_data -from torch_model import ProductionData, geoparse_model -from geoparse import doc_to_ex_expanded, Geoparser - +from elastic_utilities import add_es_data, res_formatter from elasticsearch import Elasticsearch, helpers -from elasticsearch_dsl import Search, Q +from elasticsearch_dsl import Q, Search +from geoparse import Geoparser, doc_to_ex_expanded +from spacy.language import Language +from spacy.pipeline import Pipe +from spacy.tokens import Doc, Token +from torch.utils.data import DataLoader, Dataset +from torch_model import ProductionData, geoparse_model +from tqdm import tqdm HTML_WRAPPER = """
{}
""" diff --git a/mordecai3/mordecai_utilities.py b/mordecai3/mordecai_utilities.py index d12328f..0578667 100644 --- a/mordecai3/mordecai_utilities.py +++ b/mordecai3/mordecai_utilities.py @@ -1,6 +1,6 @@ -from spacy.tokens import Token -from spacy.language import Language import numpy as np +from spacy.language import Language +from spacy.tokens import Token #def make_country_dict(): # country = pd.read_csv("assets/wikipedia-iso-country-codes.txt") diff --git a/mordecai3/roberta_qa.py b/mordecai3/roberta_qa.py index 0ab0613..521f1d0 100644 --- a/mordecai3/roberta_qa.py +++ b/mordecai3/roberta_qa.py @@ -1,5 +1,5 @@ -from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline from spacy.tokens import Span +from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline model_name = "deepset/roberta-base-squad2" diff --git a/mordecai3/tests/conftest.py b/mordecai3/tests/conftest.py index b288824..40302dc 100644 --- a/mordecai3/tests/conftest.py +++ b/mordecai3/tests/conftest.py @@ -1,8 +1,9 @@ -from ..geoparse import Geoparser import pytest - import spacy +from ..geoparse import Geoparser + + @pytest.fixture(scope='session', autouse=True) def geo(): return Geoparser() diff --git a/mordecai3/tests/test_mordecai3.py b/mordecai3/tests/test_mordecai3.py index 81afb93..dedf35d 100644 --- a/mordecai3/tests/test_mordecai3.py +++ b/mordecai3/tests/test_mordecai3.py @@ -1,7 +1,9 @@ import pytest + from .. import elastic_utilities as es_utils from .. import geoparse + def test_statement_event_loc(geo): text = "Speaking from Berlin, President Obama expressed his hope for a peaceful resolution to the fighting in Homs and Aleppo." #text = "President Obama expressed his hope for a peaceful resolution to the fighting." diff --git a/mordecai3/torch_model.py b/mordecai3/torch_model.py index 85f65d1..f8b5368 100644 --- a/mordecai3/torch_model.py +++ b/mordecai3/torch_model.py @@ -1,15 +1,15 @@ ## Read in the BERT embedding for each place name ## and predict the country using pytorch -import numpy as np import json +import logging import os +import numpy as np import torch import torch.nn as nn -from torch.utils.data import Dataset from pandas import read_csv +from torch.utils.data import Dataset -import logging logger = logging.getLogger(__name__) handler = logging.StreamHandler() formatter = logging.Formatter( diff --git a/mordecai3/train.py b/mordecai3/train.py index fd087c7..c4e87bb 100644 --- a/mordecai3/train.py +++ b/mordecai3/train.py @@ -1,36 +1,38 @@ -import random +import os import pickle +import random import re -import os + import jsonlines + os.environ['KMP_DUPLICATE_LIB_OK']='True' +import datetime +import logging +import multiprocessing + +import elastic_utilities as es_util +import elasticsearch import numpy as np -from tqdm import tqdm +import spacy import torch -import torch.optim as optim import torch.nn as nn -from torch.utils.data import DataLoader -import xmltodict -import wandb +import torch.optim as optim import typer -import spacy -from spacy.tokens import DocBin -import datetime -import multiprocessing +import wandb +import xmltodict +from error_utils import make_wandb_dict +from geoparse import guess_in_rel -from torch_model import geoparse_model -import elastic_utilities as es_util # Currently getting this error: ImportError: attempted relative import with no known parent package # when I run the line below. # from .mordecai_utilities import spacy_doc_setup from mordecai_utilities import spacy_doc_setup -from torch_model import TrainData -from error_utils import make_wandb_dict -from geoparse import guess_in_rel -import elasticsearch +from spacy.tokens import DocBin +from torch.utils.data import DataLoader +from torch_model import TrainData, geoparse_model +from tqdm import tqdm -import logging logger = logging.getLogger() handler = logging.StreamHandler() formatter = logging.Formatter( @@ -625,8 +627,8 @@ def train(batch_size: int = typer.Option(32, "--batch_size"), # input ba loss_func=nn.CrossEntropyLoss() # single label, multi-class optimizer = optim.Adam(model.parameters(), lr=config.lr) if config.avg_params: - from torch.optim.swa_utils import AveragedModel, SWALR from torch.optim.lr_scheduler import CosineAnnealingLR + from torch.optim.swa_utils import SWALR, AveragedModel swa_model = AveragedModel(model) scheduler = CosineAnnealingLR(optimizer, T_max=config.epochs+1) diff --git a/setup.py b/setup.py index 70ce4a9..40289bc 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ -from setuptools import setup, find_packages +from setuptools import find_packages, setup + setup( name = 'mordecai3', version='3.0.0a', From 41fbfaed408f98b35aeb9ce8d75f9db87d4ddcc2 Mon Sep 17 00:00:00 2001 From: DanShatford Date: Fri, 1 Sep 2023 19:45:00 +0100 Subject: [PATCH 2/2] Remove unused imports --- examples/batch_process.py | 9 +-------- examples/batch_process_production.py | 2 -- examples/production_event.py | 6 +----- mordecai3/elastic_utilities.py | 2 +- mordecai3/geoparse.py | 8 +------- mordecai3/mordecai_streamlit.py | 16 ++++------------ mordecai3/roberta_qa.py | 2 +- mordecai3/tests/conftest.py | 1 - mordecai3/train.py | 2 -- 9 files changed, 9 insertions(+), 39 deletions(-) diff --git a/examples/batch_process.py b/examples/batch_process.py index ac869da..858d0ac 100644 --- a/examples/batch_process.py +++ b/examples/batch_process.py @@ -1,7 +1,5 @@ import logging -import re import time -from collections import Counter from pathlib import Path import jsonlines @@ -10,12 +8,7 @@ import spacy import torch import typer -from elasticsearch import Elasticsearch, helpers -from elasticsearch_dsl import Q, Search -from spacy.language import Language -from spacy.pipeline import Pipe -from spacy.tokens import Doc, Token -from torch.utils.data import DataLoader, Dataset +from torch.utils.data import DataLoader from tqdm import tqdm from utilities import spacy_doc_setup diff --git a/examples/batch_process_production.py b/examples/batch_process_production.py index fa004c8..c2d5ff2 100644 --- a/examples/batch_process_production.py +++ b/examples/batch_process_production.py @@ -1,7 +1,6 @@ import logging import re import time -from collections import Counter from configparser import ConfigParser from pathlib import Path @@ -16,7 +15,6 @@ import mordecai3.elastic_utilities as es_util from mordecai3.geoparse import Geoparser, doc_to_ex_expanded -from mordecai3.roberta_qa import setup_qa from mordecai3.torch_model import ProductionData from mordecai3.utilities import spacy_doc_setup diff --git a/examples/production_event.py b/examples/production_event.py index 792c9d0..c9fcfaf 100644 --- a/examples/production_event.py +++ b/examples/production_event.py @@ -1,14 +1,10 @@ -import json - import numpy as np import pandas as pd import spacy import streamlit as st import torch from elasticsearch import Elasticsearch -from elasticsearch_dsl import Q, Search -from spacy.pipeline import Pipe -from spacy.tokens import Doc, Token +from elasticsearch_dsl import Search from torch.utils.data import DataLoader import mordecai3.elastic_utilities as es_util diff --git a/mordecai3/elastic_utilities.py b/mordecai3/elastic_utilities.py index 1537c9b..17fd578 100644 --- a/mordecai3/elastic_utilities.py +++ b/mordecai3/elastic_utilities.py @@ -5,7 +5,7 @@ import jellyfish import numpy as np -from elasticsearch import Elasticsearch, helpers +from elasticsearch import Elasticsearch from elasticsearch_dsl import Q, Search logger = logging.getLogger(__name__) diff --git a/mordecai3/geoparse.py b/mordecai3/geoparse.py index 84be93e..e889d27 100644 --- a/mordecai3/geoparse.py +++ b/mordecai3/geoparse.py @@ -2,17 +2,11 @@ import os import re -import jsonlines import numpy as np -import pandas as pd import pkg_resources import spacy import torch -from spacy.language import Language -from spacy.pipeline import Pipe -from spacy.tokens import Doc, Span, Token -from torch.utils.data import DataLoader, Dataset -from tqdm import tqdm +from torch.utils.data import DataLoader from mordecai3.elastic_utilities import ( add_es_data_doc, diff --git a/mordecai3/mordecai_streamlit.py b/mordecai3/mordecai_streamlit.py index 81bd78e..b29af0a 100644 --- a/mordecai3/mordecai_streamlit.py +++ b/mordecai3/mordecai_streamlit.py @@ -1,20 +1,12 @@ -import re - -import jsonlines import numpy as np import spacy import streamlit as st import torch -from elastic_utilities import add_es_data, res_formatter -from elasticsearch import Elasticsearch, helpers -from elasticsearch_dsl import Q, Search -from geoparse import Geoparser, doc_to_ex_expanded +from elasticsearch import Elasticsearch +from elasticsearch_dsl import Search from spacy.language import Language -from spacy.pipeline import Pipe -from spacy.tokens import Doc, Token -from torch.utils.data import DataLoader, Dataset -from torch_model import ProductionData, geoparse_model -from tqdm import tqdm +from spacy.tokens import Token +from torch_model import geoparse_model HTML_WRAPPER = """
{}
""" diff --git a/mordecai3/roberta_qa.py b/mordecai3/roberta_qa.py index 521f1d0..c210f1b 100644 --- a/mordecai3/roberta_qa.py +++ b/mordecai3/roberta_qa.py @@ -1,5 +1,5 @@ from spacy.tokens import Span -from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline +from transformers import pipeline model_name = "deepset/roberta-base-squad2" diff --git a/mordecai3/tests/conftest.py b/mordecai3/tests/conftest.py index 40302dc..e3ba52b 100644 --- a/mordecai3/tests/conftest.py +++ b/mordecai3/tests/conftest.py @@ -1,5 +1,4 @@ import pytest -import spacy from ..geoparse import Geoparser diff --git a/mordecai3/train.py b/mordecai3/train.py index c4e87bb..91358d5 100644 --- a/mordecai3/train.py +++ b/mordecai3/train.py @@ -9,10 +9,8 @@ import datetime import logging -import multiprocessing import elastic_utilities as es_util -import elasticsearch import numpy as np import spacy import torch