Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More mypy settings, typings #337

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion indexer/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def process_args(self) -> None:
logger.fatal("need --elasticsearch_config_dir or ELASTICSEARCH_CONFIG_DIR")
sys.exit(1)

def _load_template(self, name: str) -> dict | Any:
def _load_template(self, name: str) -> dict[str, Any] | Any:
"""
Load a JSON file from the Elasticsearch configuration directory.
Args:
Expand Down
24 changes: 12 additions & 12 deletions indexer/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import argparse
import logging
import sys
from typing import Callable, Dict, List, Union, cast
from typing import Any, Callable, Dict, List, Union, cast

# PyPI
from pika.exchange_type import ExchangeType
Expand Down Expand Up @@ -121,8 +121,7 @@ def __init__(self, pipeline: "Pipeline", name: str):

Outputs = List[Union[Worker, Consumer]]

# CommandMethod = Callable[["Pipeline"],None]
CommandMethod = Callable
CommandMethod = Callable[["Pipeline"], None]


def command(func: CommandMethod) -> CommandMethod:
Expand Down Expand Up @@ -333,9 +332,10 @@ def qlen(self) -> None:
@command
def show(self) -> None:
"""show queues, exchanges, bindings"""
defns = self.get_definitions()
for what in ("queues", "exchanges", "bindings"):
things = defns[what]
api = self.admin_api()
defns = api.get_definitions()

def dump_things(what: str, things: list[dict[str, Any]]) -> None:
print("")
if things:
print(what)
Expand All @@ -344,6 +344,12 @@ def show(self) -> None:
else:
print(f"no {what}")

# stubs/rabbitmq_admin.pyi defines get_definitions()
# as returning a TypedDict, so need to index with constant:
dump_things("queues", defns["queues"])
dump_things("exchanges", defns["exchanges"])
dump_things("bindings", defns["bindings"])

@command
def test(self) -> None:
"""test if queues configured"""
Expand All @@ -367,12 +373,6 @@ def get_command_func(self, cmd: str) -> Callable[[], None]:
assert callable(meth)
return cast(Callable[[], None], meth)

def get_definitions(self) -> Dict:
api = self.admin_api()
defns = api.get_definitions()
assert isinstance(defns, dict)
return defns


class MyPipeline(Pipeline):
PIPE_TYPES = ["batch-fetcher", "queue-fetcher", "historical", "archive", "csv"]
Expand Down
1 change: 1 addition & 0 deletions indexer/scripts/elastic-conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def register_repository(self, es: Elasticsearch) -> bool:
if self.es_snapshot_s3_endpoint:
settings["endpoint"] = self.es_snapshot_s3_endpoint
else: # repo-type=fs
assert self.es_snapshot_fs_location
settings = {"location": self.es_snapshot_fs_location}

try:
Expand Down
9 changes: 6 additions & 3 deletions indexer/scripts/qutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@

COMMANDS: List[str] = []

# CommandMethod = Callable[["Pipeline"],None]
CommandMethod = Callable
CommandMethod = Callable[["QUtil"], None]


def command(func: CommandMethod) -> CommandMethod:
Expand Down Expand Up @@ -222,7 +221,11 @@ def get_queue(self) -> str:
sys.exit(1)
return str(queue)

def dump_msgs(self, writer: Callable, flush: Callable) -> None:
def dump_msgs(
self,
writer: Callable[[bytes, int, BasicProperties], None],
flush: Callable[[], None],
) -> None:
"""
utility to read messages from queue & call writer
"""
Expand Down
2 changes: 1 addition & 1 deletion indexer/scripts/rabbitmq-stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def main_loop(self) -> None:
nodes = api.list_nodes()
for node in nodes: # List
# leading part is cluster name, split and give second part??
name = node.get("name").replace("@", "-")
name = node.get("name", "").replace("@", "-")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean some gauges will not have label values?

self.g(node, "fd_used", "nodes", "fds", "name", name)
self.g(node, "mem_used", "nodes", "memory", "name", name)
self.g(node, "sockets_used", "nodes", "sockets", "name", name)
Expand Down
18 changes: 10 additions & 8 deletions indexer/story.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ def uuid_by_link(link: str) -> str:

# enforces a specific naming pattern within this object, for concision and extensibility in the exit cb
# https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case
def class_to_member_name(original_class: Callable, private: bool = True) -> str:
def class_to_member_name(
original_class: type["StoryData"], private: bool = True
) -> str:
name = original_class.__name__
name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
if private:
Expand All @@ -41,11 +43,11 @@ class StoryData:
"""

dirty: bool = False
exit_cb: Callable = field(repr=False)
exit_cb: Callable[["StoryData"], None] = field(repr=False)
frozen: bool = field(default=False, repr=False)
CONTENT_TYPE: str = field(default="json", repr=False)
MEMBER_NAME: str = field(default="", repr=False)
internals: tuple = field(
internals: tuple[str, ...] = field(
default=(
"internals",
"dirty",
Expand Down Expand Up @@ -87,16 +89,16 @@ def __exit__(self, type: Any, value: Any, traceback: Any) -> None:
self.frozen = True
self.exit_cb(self)

def as_dict(self) -> dict:
output: dict = {}
def as_dict(self) -> dict[str, Any]:
output: dict[str, Any] = {}
for key in fields(self):
if key.name not in self.internals:
output[key.name] = getattr(self, key.name)

return output

# As a convenience for loading in values from a storage interface.
def load_dict(self, load_dict: dict) -> None:
def load_dict(self, load_dict: dict[str, Any]) -> None:
field_names: list[str] = [f.name for f in fields(self)]
for key, value in load_dict.items():
if key in field_names:
Expand Down Expand Up @@ -319,7 +321,7 @@ def __init__(self, directory: Optional[str] = None):

# Using the dict interface to story_data so as to avoid typing issues.
def init_storage(self, story_data: StoryData) -> None:
data_dict: dict = story_data.as_dict()
data_dict: dict[str, Any] = story_data.as_dict()
fetch_date = data_dict["fetch_date"]

if fetch_date is None:
Expand Down Expand Up @@ -421,7 +423,7 @@ def __init__(self) -> None:
"DiskStory": DiskStory,
}

def __call__(self, *args: List, **kwargs: Dict[Any, Any]) -> BaseStory:
def __call__(self, *args: List[Any], **kwargs: Dict[Any, Any]) -> BaseStory:
instance = self.classes[self.iface](*args, **kwargs)
assert isinstance(instance, BaseStory)
return instance
Expand Down
2 changes: 1 addition & 1 deletion indexer/tests/test_story.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def set_env(self) -> None:

# We want this to be cmdline toggleable probably.
@pytest.fixture(scope="class", autouse=True)
def teardown_test_datadir(self, request: Any) -> Generator:
def teardown_test_datadir(self, request: Any) -> Generator[None, None, None]:
yield
shutil.rmtree(TEST_DATA_DIR)

Expand Down
4 changes: 2 additions & 2 deletions indexer/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ def sender() -> None:
dest = routing_key # using default exchange
self.incr("sent-msgs", labels=[("dest", dest)])

def admin_api(self) -> rabbitmq_admin.AdminAPI: # type: ignore[no-any-unimported]
def admin_api(self) -> rabbitmq_admin.AdminAPI:
args = self.args
assert args

Expand Down Expand Up @@ -751,7 +751,7 @@ def _pika_ack_and_commit(self, im: InputMessage, multiple: bool = False) -> None
# AFTER basic_ack!
chan.tx_commit() # commit sent messages and ack atomically!

def _exc_headers(self, e: Exception) -> Dict:
def _exc_headers(self, e: Exception) -> Dict[str, str]:
"""
return dict of headers to add to a message
after an exception was caught
Expand Down
2 changes: 1 addition & 1 deletion indexer/workers/fetcher/BlacklistRedirectMiddleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from w3lib.url import safe_url_string


class BlacklistRedirectMiddleware(BaseRedirectMiddleware): # type: ignore[no-any-unimported]
class BlacklistRedirectMiddleware(BaseRedirectMiddleware): # type: ignore[no-any-unimported,misc]
"""
Handle redirection of requests based on response status
and meta-refresh html tag. Very Minor Edit of scrapy's builtin RedirectMiddleware
Expand Down
10 changes: 7 additions & 3 deletions indexer/workers/fetcher/batch_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
logger = logging.getLogger(__name__)


class BatchSpider(scrapy.Spider): # type: ignore[no-any-unimported]
class BatchSpider(scrapy.Spider): # type: ignore[no-any-unimported,misc]
"""
This spider is given a batch_index, loads the corresponding batchfile from the disk,
then fetches the urls in that batch's stories, and then saves the corresponding html and http_metadata
Expand Down Expand Up @@ -40,14 +40,18 @@ class BatchSpider(scrapy.Spider): # type: ignore[no-any-unimported]
}

def __init__(
self, batch: List[BaseStory], cb: Callable, *args: List, **kwargs: Dict
self,
batch: List[BaseStory],
cb: Callable[[BaseStory], None],
*args: List[Any],
**kwargs: Dict[str, Any]
) -> None:
super(BatchSpider, self).__init__(*args, **kwargs)

self.batch = batch
self.cb = cb

def start_requests(self) -> Generator:
def start_requests(self) -> Generator[scrapy.Request, None, None]: # type: ignore[no-any-unimported]
for story in self.batch:
url = story.rss_entry().link

Expand Down
1 change: 1 addition & 0 deletions indexer/workers/fetcher/fetch_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def scrapy_cb(self, story: BaseStory) -> None:
if not hasattr(self, "sender"):
self.qconnect()
self.sender = self.story_sender()
assert self.sender
self.sender.send_story(story)
status_label = "success"

Expand Down
6 changes: 3 additions & 3 deletions indexer/workers/fetcher/rss_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def src_attr_int(attr_name: str) -> Optional[int]:

def batch_rss(
source_list: List[RSSEntry], num_batches: int = 20, max_domain_size: int = 10000
) -> tuple[List, Dict]:
) -> tuple[List[List[RSSEntry]], Dict[str, int]]:
"""
Greedily pack source_list into num_batches batches, keeping each domain together.
"""
Expand All @@ -127,8 +127,8 @@ def batch_rss(
[(k, v) for k, v in agg.items()], key=lambda x: x[1]
)

batches: List = [[] for i in range(num_batches)]
batch_map: Dict = {}
batches: List[List[RSSEntry]] = [[] for i in range(num_batches)]
batch_map: Dict[str, int] = {}

while len(domains_sorted) > 0:
domain = domains_sorted.pop()
Expand Down
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ warn_return_any = true
warn_unused_ignores = true
show_error_codes = true

# from https://quantlane.com/blog/type-checking-large-codebase/
# in the order they appear:
disallow_untyped_calls = true
disallow_untyped_decorators = true
disallow_any_generics = true
disallow_subclassing_any = true
warn_redundant_casts = true
warn_unused_configs = true

# look for local stubs to PyPI packages in top level stubs directory
mypy_path = "stubs"

Expand Down
18 changes: 18 additions & 0 deletions stubs/rabbitmq_admin.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# only what we use...

import typing as _t

_JSON = dict[str, _t.Any]
_LIST_JSON = list[_JSON]

class _GetDefinitionsReturn(_t.TypedDict):
bindings: _LIST_JSON
exchanges: _LIST_JSON
queues: _LIST_JSON

class AdminAPI:
def __init__(self, url: str, auth: tuple[str, str]): ...
def _api_get(self, url: str) -> _t.Any: ...
def get_definitions(self) -> _GetDefinitionsReturn: ...
def list_exchanges(self) -> _LIST_JSON: ...
def list_nodes(self) -> _LIST_JSON: ...