Skip to content

Commit

Permalink
Placate mypy (redux)
Browse files Browse the repository at this point in the history
Signed-off-by: Elsie Hupp <[email protected]>
  • Loading branch information
elsiehupp committed Sep 8, 2023
1 parent 3a50bb7 commit 1cc3eb9
Show file tree
Hide file tree
Showing 43 changed files with 889 additions and 629 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ repos:
rev: 1.6.0
hooks:
- id: poetry-check
# - id: poetry-lock
- id: poetry-lock
- id: poetry-export
args: ["-f", "requirements.txt", "-o", "requirements.txt"]
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
303 changes: 231 additions & 72 deletions poetry.lock

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,18 @@ requests = "^2.31.0"
flake8 = "^3.9.2"
pre-commit = "^2.17.0"
pymarkdown = "^0.1.4"
mypy = "^1.5.1"
types-requests = "^2.31.0.2"
# flake8-black may be unnecessary?
flake8-black = "^0.3.6"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.pymarkdown]
disable-rules = "line-length,no-inline-html"

[tool.mypy]
check_untyped_defs = true
ignore_missing_imports = true
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -217,9 +217,9 @@ requests==2.31.0 ; python_version >= "3.8" and python_version < "4.0" \
schema==0.7.5 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:f06717112c61895cabc4707752b88716e8420a8819d71404501e114f91043197 \
--hash=sha256:f3ffdeeada09ec34bf40d7d79996d9f7175db93b7a5065de0faa7f41083c1e6c
setuptools==68.1.2 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:3d4dfa6d95f1b101d695a6160a7626e15583af71a5f52176efa5d39a054d475d \
--hash=sha256:3d8083eed2d13afc9426f227b24fd1659489ec107c0e86cec2ffdde5c92e790b
setuptools==68.2.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:00478ca80aeebeecb2f288d3206b0de568df5cd2b8fada1209843cc9a8d88a48 \
--hash=sha256:af3d5949030c3f493f550876b2fd1dd5ec66689c4ee5d5344f009746f71fd5a8
six==1.16.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \
--hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
Expand Down
26 changes: 0 additions & 26 deletions wikiteam3/dumpgenerator/__init__.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,26 +0,0 @@
#!/usr/bin/env python3

# DumpGenerator A generator of dumps for wikis
# Copyright (C) 2011-2018 WikiTeam developers
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# To learn more, read the documentation:
# https://github.com/WikiTeam/wikiteam/wiki


from wikiteam3.dumpgenerator.dump import DumpGenerator


def main():
DumpGenerator()
30 changes: 28 additions & 2 deletions wikiteam3/dumpgenerator/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,32 @@
#!/usr/bin/env python3

# DumpGenerator A generator of dumps for wikis
# Copyright (C) 2011-2018 WikiTeam developers
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# To learn more, read the documentation:
# https://github.com/WikiTeam/wikiteam/wiki


from wikiteam3.dumpgenerator.dump import DumpGenerator


def main():
DumpGenerator()


if __name__ == "__main__":
import sys

from .__init__ import main

sys.exit(main())
2 changes: 2 additions & 0 deletions wikiteam3/dumpgenerator/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
from .get_json import getJSON
from .handle_status_code import handleStatusCode
from .wiki_check import getWikiEngine

__all__ = [checkAPI, checkRetryAPI, mwGetAPIAndIndex, getJSON, handleStatusCode, getWikiEngine] # type: ignore
66 changes: 36 additions & 30 deletions wikiteam3/dumpgenerator/api/api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import re
import time
from typing import *
from urllib.parse import urljoin, urlparse, urlunparse
from typing import Any, Literal, Optional
from urllib.parse import urljoin, urlparse

import mwclient
import requests
Expand All @@ -11,7 +10,8 @@
from .get_json import getJSON


def checkAPI(api="", session: requests.Session = None):
# api="", session: requests.Session = None
def checkAPI(api: str, session: requests.Session):
"""Checking API availability"""
global cj
# handle redirects
Expand All @@ -34,29 +34,31 @@ def checkAPI(api="", session: requests.Session = None):
"MediaWiki API URL not found or giving error: HTTP %d" % r.status_code
)
return None
if "MediaWiki API is not enabled for this site." in r.text:
return None
try:
result = getJSON(r)
index = None
if result:
try:
index = (
result["query"]["general"]["server"]
+ result["query"]["general"]["script"]
)
return (True, index, api)
except KeyError:
print("MediaWiki API seems to work but returned no index URL")
return (True, None, api)
except ValueError:
print(repr(r.text))
print("MediaWiki API returned data we could not parse")
return None
if r is not None:
if "MediaWiki API is not enabled for this site." in r.text:
return None
try:
result = getJSON(r)
index = None
if result:
try:
index = (
result["query"]["general"]["server"]
+ result["query"]["general"]["script"]
)
return (True, index, api)
except KeyError:
print("MediaWiki API seems to work but returned no index URL")
return (True, None, api)
except ValueError:
print(repr(r.text))
print("MediaWiki API returned data we could not parse")
return None
return None


def mwGetAPIAndIndex(url="", session: requests.Session = None):
# url=""
def mwGetAPIAndIndex(url: str, session: requests.Session):
"""Returns the MediaWiki API and Index.php"""

api = ""
Expand Down Expand Up @@ -108,18 +110,21 @@ def mwGetAPIAndIndex(url="", session: requests.Session = None):
return api, index


def checkRetryAPI(api="", apiclient=False, session: requests.Session = None):
# api="", apiclient=False
def checkRetryAPI(api: str, apiclient: bool, session: requests.Session):
"""Call checkAPI and mwclient if necessary"""
check = None
check: (tuple[Literal[True], Any, str] | tuple[Literal[True], None, str] | None)
try:
check = checkAPI(api, session=session)
except requests.exceptions.ConnectionError as e:
print(f"Connection error: {str(e)}")
check = None

if check and apiclient:
apiurl = urlparse(api)
try:
site = mwclient.Site(
# Returns a value, but we're just checking for an error here
mwclient.Site(
apiurl.netloc,
apiurl.path.replace("api.php", ""),
scheme=apiurl.scheme,
Expand All @@ -138,13 +143,14 @@ def checkRetryAPI(api="", apiclient=False, session: requests.Session = None):
)

try:
site = mwclient.Site(
# Returns a value, but we're just checking for an error here
mwclient.Site(
apiurl.netloc,
apiurl.path.replace("api.php", ""),
scheme=newscheme,
pool=session,
)
except KeyError:
check = False
check = False # type: ignore

return check, api
return check, api # type: ignore
2 changes: 1 addition & 1 deletion wikiteam3/dumpgenerator/api/get_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ def getJSON(request: requests.Response):
# request.encoding = request.apparent_encoding
try:
return request.json()
except:
except Exception:
# Maybe an older API version which did not return correct JSON
return {}
5 changes: 3 additions & 2 deletions wikiteam3/dumpgenerator/api/index_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import requests


def checkIndex(index="", cookies="", session: requests.Session = None):
# index="", cookies="", session=None
def checkIndex(index: str, cookies: str, session: requests.Session):
"""Checking index.php availability"""
r = session.post(url=index, data={"title": "Special:Version"}, timeout=30)
r = session.post(url=index, data={"title": "Special:Version"}, timeout=30) # type: ignore
if r.status_code >= 400:
print(f"ERROR: The wiki returned status code HTTP {r.status_code}")
return False
Expand Down
56 changes: 25 additions & 31 deletions wikiteam3/dumpgenerator/api/namespaces.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,50 @@
import re

import requests

from wikiteam3.dumpgenerator.api import getJSON
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.config import Config


def getNamespacesScraper(config: Config = None, session=None):
def getNamespacesScraper(config: Config, session: requests.Session):
"""Hackishly gets the list of namespaces names and ids from the dropdown in the HTML of Special:AllPages"""
"""Function called if no API is available"""
namespaces = config.namespaces
namespacenames = {0: ""} # main is 0, no prefix
# namespacenames = {0: ""} # main is 0, no prefix
if namespaces:
r = session.post(
url=config.index, params={"title": "Special:Allpages"}, timeout=30
url=config.index, params={"title": "Special:Allpages"}, timeout=30 # type: ignore
)
raw = r.text
Delay(config=config, session=session)
Delay(config=config)

# [^>]*? to include selected="selected"
m = re.compile(
r'<option [^>]*?value=[\'"](?P<namespaceid>\d+)[\'"][^>]*?>(?P<namespacename>[^<]+)</option>'
).finditer(raw)
if "all" in namespaces:
namespaces = []
for i in m:
namespaces.append(int(i.group("namespaceid")))
namespacenames[int(i.group("namespaceid"))] = i.group("namespacename")
namespaces = [int(i.group("namespaceid")) for i in m]
# namespacenames[int(i.group("namespaceid"))] = i.group("namespacename")
else:
# check if those namespaces really exist in this wiki
namespaces2 = []
for i in m:
if int(i.group("namespaceid")) in namespaces:
namespaces2.append(int(i.group("namespaceid")))
namespacenames[int(i.group("namespaceid"))] = i.group(
"namespacename"
)
namespaces2 = [
int(i.group("namespaceid"))
for i in m
if int(i.group("namespaceid")) in namespaces
]
namespaces = namespaces2
else:
namespaces = [0]

namespaces = list(set(namespaces)) # uniques
print("%d namespaces found" % (len(namespaces)))
return namespaces, namespacenames
return namespaces


def getNamespacesAPI(config: Config = None, session=None):
def getNamespacesAPI(config: Config, session: requests.Session):
"""Uses the API to get the list of namespaces names and ids"""
namespaces = config.namespaces
namespacenames = {0: ""} # main is 0, no prefix
# namespacenames = {0: ""} # main is 0, no prefix
if namespaces:
r = session.get(
url=config.api,
Expand All @@ -60,37 +57,34 @@ def getNamespacesAPI(config: Config = None, session=None):
timeout=30,
)
result = getJSON(r)
Delay(config=config, session=session)
Delay(config=config)
try:
nsquery = result["query"]["namespaces"]
except KeyError:
except KeyError as ke:
print("Error: could not get namespaces from the API request.")
print("HTTP %d" % r.status_code)
print(r.text)
return None
raise ke

if "all" in namespaces:
namespaces = []
for i in nsquery.keys():
if int(i) < 0: # -1: Special, -2: Media, excluding
continue
namespaces.append(int(i))
namespacenames[int(i)] = nsquery[i]["*"]
namespaces = [int(i) for i in nsquery.keys() if int(i) >= 0]
# -1: Special, -2: Media, excluding
# namespacenames[int(i)] = nsquery[i]["*"]
else:
# check if those namespaces really exist in this wiki
namespaces2 = []
for i in nsquery.keys():
bi = i
# bi = i
i = int(i)
if i < 0: # -1: Special, -2: Media, excluding
continue
if i in namespaces:
namespaces2.append(i)
namespacenames[i] = nsquery[bi]["*"]
# namespacenames[i] = nsquery[bi]["*"]
namespaces = namespaces2
else:
namespaces = [0]

namespaces = list(set(namespaces)) # uniques
print("%d namespaces found" % (len(namespaces)))
return namespaces, namespacenames
return namespaces
Loading

0 comments on commit 1cc3eb9

Please sign in to comment.