diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7ef5649c..571cf10f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ repos:
rev: 1.6.0
hooks:
- id: poetry-check
- # - id: poetry-lock
+ - id: poetry-lock
- id: poetry-export
args: ["-f", "requirements.txt", "-o", "requirements.txt"]
- repo: https://github.com/pre-commit/pre-commit-hooks
diff --git a/poetry.lock b/poetry.lock
index 0ee98d70..23b1d1e0 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,10 +1,9 @@
-# This file is automatically @generated by Poetry and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
[[package]]
name = "atomicwrites"
version = "1.4.1"
description = "Atomic file writes."
-category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -15,7 +14,6 @@ files = [
name = "attrs"
version = "23.1.0"
description = "Classes Without Boilerplate"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -30,11 +28,56 @@ docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-
tests = ["attrs[tests-no-zope]", "zope-interface"]
tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+[[package]]
+name = "black"
+version = "23.7.0"
+description = "The uncompromising code formatter."
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "black-23.7.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:5c4bc552ab52f6c1c506ccae05681fab58c3f72d59ae6e6639e8885e94fe2587"},
+ {file = "black-23.7.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:552513d5cd5694590d7ef6f46e1767a4df9af168d449ff767b13b084c020e63f"},
+ {file = "black-23.7.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:86cee259349b4448adb4ef9b204bb4467aae74a386bce85d56ba4f5dc0da27be"},
+ {file = "black-23.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:501387a9edcb75d7ae8a4412bb8749900386eaef258f1aefab18adddea1936bc"},
+ {file = "black-23.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb074d8b213749fa1d077d630db0d5f8cc3b2ae63587ad4116e8a436e9bbe995"},
+ {file = "black-23.7.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:b5b0ee6d96b345a8b420100b7d71ebfdd19fab5e8301aff48ec270042cd40ac2"},
+ {file = "black-23.7.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:893695a76b140881531062d48476ebe4a48f5d1e9388177e175d76234ca247cd"},
+ {file = "black-23.7.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:c333286dc3ddca6fdff74670b911cccedacb4ef0a60b34e491b8a67c833b343a"},
+ {file = "black-23.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831d8f54c3a8c8cf55f64d0422ee875eecac26f5f649fb6c1df65316b67c8926"},
+ {file = "black-23.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:7f3bf2dec7d541b4619b8ce526bda74a6b0bffc480a163fed32eb8b3c9aed8ad"},
+ {file = "black-23.7.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:f9062af71c59c004cd519e2fb8f5d25d39e46d3af011b41ab43b9c74e27e236f"},
+ {file = "black-23.7.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:01ede61aac8c154b55f35301fac3e730baf0c9cf8120f65a9cd61a81cfb4a0c3"},
+ {file = "black-23.7.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:327a8c2550ddc573b51e2c352adb88143464bb9d92c10416feb86b0f5aee5ff6"},
+ {file = "black-23.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1c6022b86f83b632d06f2b02774134def5d4d4f1dac8bef16d90cda18ba28a"},
+ {file = "black-23.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:27eb7a0c71604d5de083757fbdb245b1a4fae60e9596514c6ec497eb63f95320"},
+ {file = "black-23.7.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:8417dbd2f57b5701492cd46edcecc4f9208dc75529bcf76c514864e48da867d9"},
+ {file = "black-23.7.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:47e56d83aad53ca140da0af87678fb38e44fd6bc0af71eebab2d1f59b1acf1d3"},
+ {file = "black-23.7.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:25cc308838fe71f7065df53aedd20327969d05671bac95b38fdf37ebe70ac087"},
+ {file = "black-23.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:642496b675095d423f9b8448243336f8ec71c9d4d57ec17bf795b67f08132a91"},
+ {file = "black-23.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:ad0014efc7acf0bd745792bd0d8857413652979200ab924fbf239062adc12491"},
+ {file = "black-23.7.0-py3-none-any.whl", hash = "sha256:9fd59d418c60c0348505f2ddf9609c1e1de8e7493eab96198fc89d9f865e7a96"},
+ {file = "black-23.7.0.tar.gz", hash = "sha256:022a582720b0d9480ed82576c920a8c1dde97cc38ff11d8d8859b3bd6ca9eedb"},
+]
+
+[package.dependencies]
+click = ">=8.0.0"
+mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
+platformdirs = ">=2"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
+
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.7.4)"]
+jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
+uvloop = ["uvloop (>=0.15.2)"]
+
[[package]]
name = "certifi"
version = "2023.7.22"
description = "Python package for providing Mozilla's CA Bundle."
-category = "main"
optional = false
python-versions = ">=3.6"
files = [
@@ -46,7 +89,6 @@ files = [
name = "cfgv"
version = "3.4.0"
description = "Validate configuration and produce human readable error messages."
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
@@ -58,7 +100,6 @@ files = [
name = "charset-normalizer"
version = "3.2.0"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
-category = "main"
optional = false
python-versions = ">=3.7.0"
files = [
@@ -139,11 +180,24 @@ files = [
{file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"},
]
+[[package]]
+name = "click"
+version = "8.1.7"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
+ {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
[[package]]
name = "colorama"
version = "0.4.6"
description = "Cross-platform colored terminal text."
-category = "main"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
files = [
@@ -155,7 +209,6 @@ files = [
name = "contextlib2"
version = "21.6.0"
description = "Backports and enhancements for the contextlib module"
-category = "main"
optional = false
python-versions = ">=3.6"
files = [
@@ -167,7 +220,6 @@ files = [
name = "distlib"
version = "0.3.7"
description = "Distribution utilities"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -179,7 +231,6 @@ files = [
name = "docopt"
version = "0.6.2"
description = "Pythonic argument parser, that will make you smile"
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -190,7 +241,6 @@ files = [
name = "file-read-backwards"
version = "2.0.0"
description = "Memory efficient way of reading files line-by-line from the end of file"
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -200,25 +250,26 @@ files = [
[[package]]
name = "filelock"
-version = "3.12.2"
+version = "3.12.3"
description = "A platform independent file lock."
-category = "dev"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
files = [
- {file = "filelock-3.12.2-py3-none-any.whl", hash = "sha256:cbb791cdea2a72f23da6ac5b5269ab0a0d161e9ef0100e653b69049a7706d1ec"},
- {file = "filelock-3.12.2.tar.gz", hash = "sha256:002740518d8aa59a26b0c76e10fb8c6e15eae825d34b6fdf670333fd7b938d81"},
+ {file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"},
+ {file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"},
]
+[package.dependencies]
+typing-extensions = {version = ">=4.7.1", markers = "python_version < \"3.11\""}
+
[package.extras]
-docs = ["furo (>=2023.5.20)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
-testing = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "diff-cover (>=7.5)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"]
+docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"]
[[package]]
name = "flake8"
version = "3.9.2"
description = "the modular source code checker: pep8 pyflakes and co"
-category = "dev"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
files = [
@@ -231,16 +282,34 @@ mccabe = ">=0.6.0,<0.7.0"
pycodestyle = ">=2.7.0,<2.8.0"
pyflakes = ">=2.3.0,<2.4.0"
+[[package]]
+name = "flake8-black"
+version = "0.3.6"
+description = "flake8 plugin to call black as a code style validator"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "flake8-black-0.3.6.tar.gz", hash = "sha256:0dfbca3274777792a5bcb2af887a4cad72c72d0e86c94e08e3a3de151bb41c34"},
+ {file = "flake8_black-0.3.6-py3-none-any.whl", hash = "sha256:fe8ea2eca98d8a504f22040d9117347f6b367458366952862ac3586e7d4eeaca"},
+]
+
+[package.dependencies]
+black = ">=22.1.0"
+flake8 = ">=3"
+tomli = {version = "*", markers = "python_version < \"3.11\""}
+
+[package.extras]
+develop = ["build", "twine"]
+
[[package]]
name = "identify"
-version = "2.5.26"
+version = "2.5.27"
description = "File identification library for Python"
-category = "dev"
optional = false
python-versions = ">=3.8"
files = [
- {file = "identify-2.5.26-py2.py3-none-any.whl", hash = "sha256:c22a8ead0d4ca11f1edd6c9418c3220669b3b7533ada0a0ffa6cc0ef85cf9b54"},
- {file = "identify-2.5.26.tar.gz", hash = "sha256:7243800bce2f58404ed41b7c002e53d4d22bcf3ae1b7900c2d7aefd95394bf7f"},
+ {file = "identify-2.5.27-py2.py3-none-any.whl", hash = "sha256:fdb527b2dfe24602809b2201e033c2a113d7bdf716db3ca8e3243f735dcecaba"},
+ {file = "identify-2.5.27.tar.gz", hash = "sha256:287b75b04a0e22d727bc9a41f0d4f3c1bcada97490fa6eabb5b28f0e9097e733"},
]
[package.extras]
@@ -250,7 +319,6 @@ license = ["ukkonen"]
name = "idna"
version = "3.4"
description = "Internationalized Domain Names in Applications (IDNA)"
-category = "main"
optional = false
python-versions = ">=3.5"
files = [
@@ -262,7 +330,6 @@ files = [
name = "iniconfig"
version = "2.0.0"
description = "brain-dead simple config-ini parsing"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -274,7 +341,6 @@ files = [
name = "internetarchive"
version = "3.5.0"
description = "A Python interface to archive.org."
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -300,7 +366,6 @@ types = ["tqdm-stubs (>=0.2.0)", "types-colorama", "types-docopt (>=0.6.10,<0.7.
name = "jsonpatch"
version = "1.33"
description = "Apply JSON-Patches (RFC 6902)"
-category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
files = [
@@ -315,7 +380,6 @@ jsonpointer = ">=1.9"
name = "jsonpointer"
version = "2.4"
description = "Identify specific nodes in a JSON document (RFC 6901)"
-category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
files = [
@@ -327,7 +391,6 @@ files = [
name = "lxml"
version = "4.9.3"
description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
-category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
files = [
@@ -435,7 +498,6 @@ source = ["Cython (>=0.29.35)"]
name = "mccabe"
version = "0.6.1"
description = "McCabe checker, plugin for flake8"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -447,7 +509,6 @@ files = [
name = "mwclient"
version = "0.10.1"
description = "MediaWiki API client"
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -459,11 +520,67 @@ files = [
requests-oauthlib = "*"
six = "*"
+[[package]]
+name = "mypy"
+version = "1.5.1"
+description = "Optional static typing for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "mypy-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f33592ddf9655a4894aef22d134de7393e95fcbdc2d15c1ab65828eee5c66c70"},
+ {file = "mypy-1.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:258b22210a4a258ccd077426c7a181d789d1121aca6db73a83f79372f5569ae0"},
+ {file = "mypy-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9ec1f695f0c25986e6f7f8778e5ce61659063268836a38c951200c57479cc12"},
+ {file = "mypy-1.5.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:abed92d9c8f08643c7d831300b739562b0a6c9fcb028d211134fc9ab20ccad5d"},
+ {file = "mypy-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:a156e6390944c265eb56afa67c74c0636f10283429171018446b732f1a05af25"},
+ {file = "mypy-1.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6ac9c21bfe7bc9f7f1b6fae441746e6a106e48fc9de530dea29e8cd37a2c0cc4"},
+ {file = "mypy-1.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:51cb1323064b1099e177098cb939eab2da42fea5d818d40113957ec954fc85f4"},
+ {file = "mypy-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:596fae69f2bfcb7305808c75c00f81fe2829b6236eadda536f00610ac5ec2243"},
+ {file = "mypy-1.5.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:32cb59609b0534f0bd67faebb6e022fe534bdb0e2ecab4290d683d248be1b275"},
+ {file = "mypy-1.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:159aa9acb16086b79bbb0016145034a1a05360626046a929f84579ce1666b315"},
+ {file = "mypy-1.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f6b0e77db9ff4fda74de7df13f30016a0a663928d669c9f2c057048ba44f09bb"},
+ {file = "mypy-1.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26f71b535dfc158a71264e6dc805a9f8d2e60b67215ca0bfa26e2e1aa4d4d373"},
+ {file = "mypy-1.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fc3a600f749b1008cc75e02b6fb3d4db8dbcca2d733030fe7a3b3502902f161"},
+ {file = "mypy-1.5.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:26fb32e4d4afa205b24bf645eddfbb36a1e17e995c5c99d6d00edb24b693406a"},
+ {file = "mypy-1.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:82cb6193de9bbb3844bab4c7cf80e6227d5225cc7625b068a06d005d861ad5f1"},
+ {file = "mypy-1.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4a465ea2ca12804d5b34bb056be3a29dc47aea5973b892d0417c6a10a40b2d65"},
+ {file = "mypy-1.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9fece120dbb041771a63eb95e4896791386fe287fefb2837258925b8326d6160"},
+ {file = "mypy-1.5.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d28ddc3e3dfeab553e743e532fb95b4e6afad51d4706dd22f28e1e5e664828d2"},
+ {file = "mypy-1.5.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:57b10c56016adce71fba6bc6e9fd45d8083f74361f629390c556738565af8eeb"},
+ {file = "mypy-1.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:ff0cedc84184115202475bbb46dd99f8dcb87fe24d5d0ddfc0fe6b8575c88d2f"},
+ {file = "mypy-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8f772942d372c8cbac575be99f9cc9d9fb3bd95c8bc2de6c01411e2c84ebca8a"},
+ {file = "mypy-1.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5d627124700b92b6bbaa99f27cbe615c8ea7b3402960f6372ea7d65faf376c14"},
+ {file = "mypy-1.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:361da43c4f5a96173220eb53340ace68cda81845cd88218f8862dfb0adc8cddb"},
+ {file = "mypy-1.5.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:330857f9507c24de5c5724235e66858f8364a0693894342485e543f5b07c8693"},
+ {file = "mypy-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:c543214ffdd422623e9fedd0869166c2f16affe4ba37463975043ef7d2ea8770"},
+ {file = "mypy-1.5.1-py3-none-any.whl", hash = "sha256:f757063a83970d67c444f6e01d9550a7402322af3557ce7630d3c957386fa8f5"},
+ {file = "mypy-1.5.1.tar.gz", hash = "sha256:b031b9601f1060bf1281feab89697324726ba0c0bae9d7cd7ab4b690940f0b92"},
+]
+
+[package.dependencies]
+mypy-extensions = ">=1.0.0"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = ">=4.1.0"
+
+[package.extras]
+dmypy = ["psutil (>=4.0)"]
+install-types = ["pip"]
+reports = ["lxml"]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.0.0"
+description = "Type system extensions for programs checked with the mypy type checker."
+optional = false
+python-versions = ">=3.5"
+files = [
+ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
+ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
+]
+
[[package]]
name = "nodeenv"
version = "1.8.0"
description = "Node.js virtual environment builder"
-category = "dev"
optional = false
python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
files = [
@@ -478,7 +595,6 @@ setuptools = "*"
name = "oauthlib"
version = "3.2.2"
description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic"
-category = "main"
optional = false
python-versions = ">=3.6"
files = [
@@ -495,7 +611,6 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
name = "packaging"
version = "23.1"
description = "Core utilities for Python packages"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -503,11 +618,21 @@ files = [
{file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
]
+[[package]]
+name = "pathspec"
+version = "0.11.2"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"},
+ {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"},
+]
+
[[package]]
name = "platformdirs"
version = "3.10.0"
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -521,14 +646,13 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co
[[package]]
name = "pluggy"
-version = "1.2.0"
+version = "1.3.0"
description = "plugin and hook calling mechanisms for python"
-category = "dev"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
files = [
- {file = "pluggy-1.2.0-py3-none-any.whl", hash = "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849"},
- {file = "pluggy-1.2.0.tar.gz", hash = "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3"},
+ {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"},
+ {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"},
]
[package.extras]
@@ -539,7 +663,6 @@ testing = ["pytest", "pytest-benchmark"]
name = "poster3"
version = "0.8.1"
description = "Streaming HTTP uploads and multipart/form-data encoding"
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -553,7 +676,6 @@ poster3 = ["buildutils", "sphinx"]
name = "pre-commit"
version = "2.21.0"
description = "A framework for managing and maintaining multi-language pre-commit hooks."
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@@ -572,7 +694,6 @@ virtualenv = ">=20.10.0"
name = "pre-commit-poetry-export"
version = "0.1.2"
description = "pre-commit hook to keep requirements.txt updated"
-category = "main"
optional = false
python-versions = ">=3.8,<4.0"
files = [
@@ -584,7 +705,6 @@ files = [
name = "py"
version = "1.11.0"
description = "library with cross-python path, ini-parsing, io, code, log facilities"
-category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
files = [
@@ -596,7 +716,6 @@ files = [
name = "pycodestyle"
version = "2.7.0"
description = "Python style guide checker"
-category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -608,7 +727,6 @@ files = [
name = "pyflakes"
version = "2.3.1"
description = "passive checker of Python programs"
-category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -620,7 +738,6 @@ files = [
name = "pymarkdown"
version = "0.1.4"
description = "Evaluate code in markdown"
-category = "dev"
optional = false
python-versions = "*"
files = [
@@ -634,7 +751,6 @@ toolz = "*"
name = "pymysql"
version = "1.1.0"
description = "Pure Python MySQL Driver"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -650,7 +766,6 @@ rsa = ["cryptography"]
name = "pytest"
version = "6.2.5"
description = "pytest: simple powerful testing with Python"
-category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@@ -675,7 +790,6 @@ testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xm
name = "pywikibot"
version = "6.6.5"
description = "Python MediaWiki Bot Framework"
-category = "main"
optional = false
python-versions = ">=3.5.0"
files = [
@@ -712,7 +826,6 @@ wikitextparser = ["wikitextparser (>=0.47.0)", "wikitextparser (>=0.47.5)"]
name = "pyyaml"
version = "6.0.1"
description = "YAML parser and emitter for Python"
-category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@@ -721,6 +834,7 @@ files = [
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
+ {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@@ -728,8 +842,15 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
+ {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
+ {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
+ {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+ {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
+ {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
+ {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
+ {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@@ -746,6 +867,7 @@ files = [
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
+ {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@@ -753,6 +875,7 @@ files = [
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
+ {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@@ -762,7 +885,6 @@ files = [
name = "requests"
version = "2.31.0"
description = "Python HTTP for Humans."
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -784,7 +906,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
name = "requests-oauthlib"
version = "1.3.1"
description = "OAuthlib authentication support for Requests."
-category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@@ -803,7 +924,6 @@ rsa = ["oauthlib[signedtoken] (>=3.0.0)"]
name = "schema"
version = "0.7.5"
description = "Simple data validation library"
-category = "main"
optional = false
python-versions = "*"
files = [
@@ -818,7 +938,6 @@ contextlib2 = ">=0.5.5"
name = "setuptools"
version = "68.1.2"
description = "Easily download, build, install, upgrade, and uninstall Python packages"
-category = "main"
optional = false
python-versions = ">=3.8"
files = [
@@ -835,7 +954,6 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (
name = "six"
version = "1.16.0"
description = "Python 2 and 3 compatibility utilities"
-category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
files = [
@@ -847,7 +965,6 @@ files = [
name = "toml"
version = "0.10.2"
description = "Python Library for Tom's Obvious, Minimal Language"
-category = "dev"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
files = [
@@ -855,11 +972,21 @@ files = [
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
]
+[[package]]
+name = "tomli"
+version = "2.0.1"
+description = "A lil' TOML parser"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
+ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+]
+
[[package]]
name = "toolz"
version = "0.12.0"
description = "List processing tools and functional utilities"
-category = "dev"
optional = false
python-versions = ">=3.5"
files = [
@@ -871,7 +998,6 @@ files = [
name = "tqdm"
version = "4.66.1"
description = "Fast, Extensible Progress Meter"
-category = "main"
optional = false
python-versions = ">=3.7"
files = [
@@ -888,11 +1014,46 @@ notebook = ["ipywidgets (>=6)"]
slack = ["slack-sdk"]
telegram = ["requests"]
+[[package]]
+name = "types-requests"
+version = "2.31.0.2"
+description = "Typing stubs for requests"
+optional = false
+python-versions = "*"
+files = [
+ {file = "types-requests-2.31.0.2.tar.gz", hash = "sha256:6aa3f7faf0ea52d728bb18c0a0d1522d9bfd8c72d26ff6f61bfc3d06a411cf40"},
+ {file = "types_requests-2.31.0.2-py3-none-any.whl", hash = "sha256:56d181c85b5925cbc59f4489a57e72a8b2166f18273fd8ba7b6fe0c0b986f12a"},
+]
+
+[package.dependencies]
+types-urllib3 = "*"
+
+[[package]]
+name = "types-urllib3"
+version = "1.26.25.14"
+description = "Typing stubs for urllib3"
+optional = false
+python-versions = "*"
+files = [
+ {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"},
+ {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"},
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.7.1"
+description = "Backported and Experimental Type Hints for Python 3.7+"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"},
+ {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"},
+]
+
[[package]]
name = "urllib3"
version = "1.26.16"
description = "HTTP library with thread-safe connection pooling, file post, and more."
-category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
files = [
@@ -907,14 +1068,13 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[[package]]
name = "virtualenv"
-version = "20.24.3"
+version = "20.24.4"
description = "Virtual Python Environment builder"
-category = "dev"
optional = false
python-versions = ">=3.7"
files = [
- {file = "virtualenv-20.24.3-py3-none-any.whl", hash = "sha256:95a6e9398b4967fbcb5fef2acec5efaf9aa4972049d9ae41f95e0972a683fd02"},
- {file = "virtualenv-20.24.3.tar.gz", hash = "sha256:e5c3b4ce817b0b328af041506a2a299418c98747c4b1e68cb7527e74ced23efc"},
+ {file = "virtualenv-20.24.4-py3-none-any.whl", hash = "sha256:29c70bb9b88510f6414ac3e55c8b413a1f96239b6b789ca123437d5e892190cb"},
+ {file = "virtualenv-20.24.4.tar.gz", hash = "sha256:772b05bfda7ed3b8ecd16021ca9716273ad9f4467c801f27e83ac73430246dca"},
]
[package.dependencies]
@@ -923,14 +1083,13 @@ filelock = ">=3.12.2,<4"
platformdirs = ">=3.9.1,<4"
[package.extras]
-docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
+docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
[[package]]
name = "wikitools3"
version = "3.0.1"
description = "Python package for interacting with a MediaWiki wiki. It is used by WikiTeam for archiving MediaWiki wikis."
-category = "main"
optional = false
python-versions = ">=3.8,<4.0"
files = [
@@ -944,4 +1103,4 @@ poster3 = ">=0.8.1,<0.9.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.8"
-content-hash = "1eee6035c5660e8cba28942140937e2ceb36bf90482e76fa5ddd054efa3c659c"
+content-hash = "ebed56288c755209a5da1b75673fdda769a85b22d5f1c26fcb7492d971ffd617"
diff --git a/pyproject.toml b/pyproject.toml
index 8453bae1..040dbc52 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,6 +77,10 @@ requests = "^2.31.0"
flake8 = "^3.9.2"
pre-commit = "^2.17.0"
pymarkdown = "^0.1.4"
+mypy = "^1.5.1"
+types-requests = "^2.31.0.2"
+# flake8-black may be unnecessary?
+flake8-black = "^0.3.6"
[build-system]
requires = ["poetry-core>=1.0.0"]
@@ -84,3 +88,7 @@ build-backend = "poetry.core.masonry.api"
[tool.pymarkdown]
disable-rules = "line-length,no-inline-html"
+
+[tool.mypy]
+check_untyped_defs = true
+ignore_missing_imports = true
diff --git a/wikiteam3/dumpgenerator/__init__.py b/wikiteam3/dumpgenerator/__init__.py
old mode 100755
new mode 100644
index b5da8b1e..e69de29b
--- a/wikiteam3/dumpgenerator/__init__.py
+++ b/wikiteam3/dumpgenerator/__init__.py
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-
-# DumpGenerator A generator of dumps for wikis
-# Copyright (C) 2011-2018 WikiTeam developers
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-# To learn more, read the documentation:
-# https://github.com/WikiTeam/wikiteam/wiki
-
-
-from wikiteam3.dumpgenerator.dump import DumpGenerator
-
-
-def main():
- DumpGenerator()
diff --git a/wikiteam3/dumpgenerator/__main__.py b/wikiteam3/dumpgenerator/__main__.py
index 0321cad7..4981f111 100644
--- a/wikiteam3/dumpgenerator/__main__.py
+++ b/wikiteam3/dumpgenerator/__main__.py
@@ -1,6 +1,32 @@
+#!/usr/bin/env python3
+
+# DumpGenerator A generator of dumps for wikis
+# Copyright (C) 2011-2018 WikiTeam developers
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+# To learn more, read the documentation:
+# https://github.com/WikiTeam/wikiteam/wiki
+
+
+from wikiteam3.dumpgenerator.dump import DumpGenerator
+
+
+def main():
+ DumpGenerator()
+
+
if __name__ == "__main__":
import sys
- from .__init__ import main
-
sys.exit(main())
diff --git a/wikiteam3/dumpgenerator/api/__init__.py b/wikiteam3/dumpgenerator/api/__init__.py
index 7d86c175..3748c5e3 100644
--- a/wikiteam3/dumpgenerator/api/__init__.py
+++ b/wikiteam3/dumpgenerator/api/__init__.py
@@ -2,3 +2,5 @@
from .get_json import getJSON
from .handle_status_code import handleStatusCode
from .wiki_check import getWikiEngine
+
+__all__ = [checkAPI, checkRetryAPI, mwGetAPIAndIndex, getJSON, handleStatusCode, getWikiEngine] # type: ignore
diff --git a/wikiteam3/dumpgenerator/api/api.py b/wikiteam3/dumpgenerator/api/api.py
index 0fa855d0..f3d39488 100644
--- a/wikiteam3/dumpgenerator/api/api.py
+++ b/wikiteam3/dumpgenerator/api/api.py
@@ -1,7 +1,6 @@
import re
-import time
-from typing import *
-from urllib.parse import urljoin, urlparse, urlunparse
+from typing import Any, Literal, Optional
+from urllib.parse import urljoin, urlparse
import mwclient
import requests
@@ -11,7 +10,8 @@
from .get_json import getJSON
-def checkAPI(api="", session: requests.Session = None):
+# api="", session: requests.Session = None
+def checkAPI(api: str, session: requests.Session):
"""Checking API availability"""
global cj
# handle redirects
@@ -34,29 +34,31 @@ def checkAPI(api="", session: requests.Session = None):
"MediaWiki API URL not found or giving error: HTTP %d" % r.status_code
)
return None
- if "MediaWiki API is not enabled for this site." in r.text:
- return None
- try:
- result = getJSON(r)
- index = None
- if result:
- try:
- index = (
- result["query"]["general"]["server"]
- + result["query"]["general"]["script"]
- )
- return (True, index, api)
- except KeyError:
- print("MediaWiki API seems to work but returned no index URL")
- return (True, None, api)
- except ValueError:
- print(repr(r.text))
- print("MediaWiki API returned data we could not parse")
- return None
+ if r is not None:
+ if "MediaWiki API is not enabled for this site." in r.text:
+ return None
+ try:
+ result = getJSON(r)
+ index = None
+ if result:
+ try:
+ index = (
+ result["query"]["general"]["server"]
+ + result["query"]["general"]["script"]
+ )
+ return (True, index, api)
+ except KeyError:
+ print("MediaWiki API seems to work but returned no index URL")
+ return (True, None, api)
+ except ValueError:
+ print(repr(r.text))
+ print("MediaWiki API returned data we could not parse")
+ return None
return None
-def mwGetAPIAndIndex(url="", session: requests.Session = None):
+# url=""
+def mwGetAPIAndIndex(url: str, session: requests.Session):
"""Returns the MediaWiki API and Index.php"""
api = ""
@@ -108,18 +110,21 @@ def mwGetAPIAndIndex(url="", session: requests.Session = None):
return api, index
-def checkRetryAPI(api="", apiclient=False, session: requests.Session = None):
+# api="", apiclient=False
+def checkRetryAPI(api: str, apiclient: bool, session: requests.Session):
"""Call checkAPI and mwclient if necessary"""
- check = None
+ check: (tuple[Literal[True], Any, str] | tuple[Literal[True], None, str] | None)
try:
check = checkAPI(api, session=session)
except requests.exceptions.ConnectionError as e:
print(f"Connection error: {str(e)}")
+ check = None
if check and apiclient:
apiurl = urlparse(api)
try:
- site = mwclient.Site(
+ # Returns a value, but we're just checking for an error here
+ mwclient.Site(
apiurl.netloc,
apiurl.path.replace("api.php", ""),
scheme=apiurl.scheme,
@@ -138,13 +143,14 @@ def checkRetryAPI(api="", apiclient=False, session: requests.Session = None):
)
try:
- site = mwclient.Site(
+ # Returns a value, but we're just checking for an error here
+ mwclient.Site(
apiurl.netloc,
apiurl.path.replace("api.php", ""),
scheme=newscheme,
pool=session,
)
except KeyError:
- check = False
+ check = False # type: ignore
- return check, api
+ return check, api # type: ignore
diff --git a/wikiteam3/dumpgenerator/api/get_json.py b/wikiteam3/dumpgenerator/api/get_json.py
index 7a3b2273..bd1aa48d 100644
--- a/wikiteam3/dumpgenerator/api/get_json.py
+++ b/wikiteam3/dumpgenerator/api/get_json.py
@@ -8,6 +8,6 @@ def getJSON(request: requests.Response):
# request.encoding = request.apparent_encoding
try:
return request.json()
- except:
+ except Exception:
# Maybe an older API version which did not return correct JSON
return {}
diff --git a/wikiteam3/dumpgenerator/api/index_check.py b/wikiteam3/dumpgenerator/api/index_check.py
index 50ae58c0..d29fa2c9 100644
--- a/wikiteam3/dumpgenerator/api/index_check.py
+++ b/wikiteam3/dumpgenerator/api/index_check.py
@@ -3,9 +3,10 @@
import requests
-def checkIndex(index="", cookies="", session: requests.Session = None):
+# index="", cookies="", session=None
+def checkIndex(index: str, cookies: str, session: requests.Session):
"""Checking index.php availability"""
- r = session.post(url=index, data={"title": "Special:Version"}, timeout=30)
+ r = session.post(url=index, data={"title": "Special:Version"}, timeout=30) # type: ignore
if r.status_code >= 400:
print(f"ERROR: The wiki returned status code HTTP {r.status_code}")
return False
diff --git a/wikiteam3/dumpgenerator/api/namespaces.py b/wikiteam3/dumpgenerator/api/namespaces.py
index b9fbbdeb..93c5f70f 100644
--- a/wikiteam3/dumpgenerator/api/namespaces.py
+++ b/wikiteam3/dumpgenerator/api/namespaces.py
@@ -1,53 +1,50 @@
import re
+import requests
+
from wikiteam3.dumpgenerator.api import getJSON
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.config import Config
-def getNamespacesScraper(config: Config = None, session=None):
+def getNamespacesScraper(config: Config, session: requests.Session):
"""Hackishly gets the list of namespaces names and ids from the dropdown in the HTML of Special:AllPages"""
"""Function called if no API is available"""
namespaces = config.namespaces
- namespacenames = {0: ""} # main is 0, no prefix
+ # namespacenames = {0: ""} # main is 0, no prefix
if namespaces:
r = session.post(
- url=config.index, params={"title": "Special:Allpages"}, timeout=30
+ url=config.index, params={"title": "Special:Allpages"}, timeout=30 # type: ignore
)
raw = r.text
- Delay(config=config, session=session)
+ Delay(config=config)
# [^>]*? to include selected="selected"
m = re.compile(
r''
).finditer(raw)
if "all" in namespaces:
- namespaces = []
- for i in m:
- namespaces.append(int(i.group("namespaceid")))
- namespacenames[int(i.group("namespaceid"))] = i.group("namespacename")
+ namespaces = [int(i.group("namespaceid")) for i in m]
+ # namespacenames[int(i.group("namespaceid"))] = i.group("namespacename")
else:
- # check if those namespaces really exist in this wiki
- namespaces2 = []
- for i in m:
- if int(i.group("namespaceid")) in namespaces:
- namespaces2.append(int(i.group("namespaceid")))
- namespacenames[int(i.group("namespaceid"))] = i.group(
- "namespacename"
- )
+ namespaces2 = [
+ int(i.group("namespaceid"))
+ for i in m
+ if int(i.group("namespaceid")) in namespaces
+ ]
namespaces = namespaces2
else:
namespaces = [0]
namespaces = list(set(namespaces)) # uniques
print("%d namespaces found" % (len(namespaces)))
- return namespaces, namespacenames
+ return namespaces
-def getNamespacesAPI(config: Config = None, session=None):
+def getNamespacesAPI(config: Config, session: requests.Session):
"""Uses the API to get the list of namespaces names and ids"""
namespaces = config.namespaces
- namespacenames = {0: ""} # main is 0, no prefix
+ # namespacenames = {0: ""} # main is 0, no prefix
if namespaces:
r = session.get(
url=config.api,
@@ -60,37 +57,34 @@ def getNamespacesAPI(config: Config = None, session=None):
timeout=30,
)
result = getJSON(r)
- Delay(config=config, session=session)
+ Delay(config=config)
try:
nsquery = result["query"]["namespaces"]
- except KeyError:
+ except KeyError as ke:
print("Error: could not get namespaces from the API request.")
print("HTTP %d" % r.status_code)
print(r.text)
- return None
+ raise ke
if "all" in namespaces:
- namespaces = []
- for i in nsquery.keys():
- if int(i) < 0: # -1: Special, -2: Media, excluding
- continue
- namespaces.append(int(i))
- namespacenames[int(i)] = nsquery[i]["*"]
+ namespaces = [int(i) for i in nsquery.keys() if int(i) >= 0]
+ # -1: Special, -2: Media, excluding
+ # namespacenames[int(i)] = nsquery[i]["*"]
else:
# check if those namespaces really exist in this wiki
namespaces2 = []
for i in nsquery.keys():
- bi = i
+ # bi = i
i = int(i)
if i < 0: # -1: Special, -2: Media, excluding
continue
if i in namespaces:
namespaces2.append(i)
- namespacenames[i] = nsquery[bi]["*"]
+ # namespacenames[i] = nsquery[bi]["*"]
namespaces = namespaces2
else:
namespaces = [0]
namespaces = list(set(namespaces)) # uniques
print("%d namespaces found" % (len(namespaces)))
- return namespaces, namespacenames
+ return namespaces
diff --git a/wikiteam3/dumpgenerator/api/page_titles.py b/wikiteam3/dumpgenerator/api/page_titles.py
index 4e12ba26..d1c9b29e 100644
--- a/wikiteam3/dumpgenerator/api/page_titles.py
+++ b/wikiteam3/dumpgenerator/api/page_titles.py
@@ -1,9 +1,11 @@
import re
-import sys
+from typing import List
from urllib.parse import urlparse
import mwclient
+import requests
from file_read_backwards import FileReadBackwards
+from mwclient.page import Page
from wikiteam3.dumpgenerator.api.namespaces import (
getNamespacesAPI,
@@ -15,10 +17,10 @@
from wikiteam3.utils.monkey_patch import DelaySession
-def getPageTitlesAPI(config: Config = None, session=None):
+def getPageTitlesAPI(config: Config, session: requests.Session):
"""Uses the API to get the list of page titles"""
titles = []
- namespaces, namespacenames = getNamespacesAPI(config=config, session=session)
+ namespaces: List[int] = getNamespacesAPI(config=config, session=session)
# apply delay to the session for mwclient.Site.allpages()
delay_session = DelaySession(
@@ -38,10 +40,11 @@ def getPageTitlesAPI(config: Config = None, session=None):
scheme=apiurl.scheme,
pool=session,
)
- for page in site.allpages(namespace=namespace):
- title = page.name
- titles.append(title)
- yield title
+ for page in site.allpages(namespace=str(namespace)):
+ if page is Page:
+ title = page.name
+ titles.append(title)
+ yield title
if len(titles) != len(set(titles)):
print("Probably a loop, switching to next namespace")
@@ -50,10 +53,10 @@ def getPageTitlesAPI(config: Config = None, session=None):
delay_session.release()
-def getPageTitlesScraper(config: Config = None, session=None):
+def getPageTitlesScraper(config: Config, session: requests.Session):
"""Scrape the list of page titles from Special:Allpages"""
titles = []
- namespaces, namespacenames = getNamespacesScraper(config=config, session=session)
+ namespaces = getNamespacesScraper(config=config, session=session)
r_title = r'title="(?P
[^>]+)">'
r_suballpages1 = r'&from=(?P[^>"]+)&to=(?P[^>"]+)">'
r_suballpages2 = r'Special:Allpages/(?P[^>"]+)">'
@@ -75,7 +78,7 @@ def getPageTitlesScraper(config: Config = None, session=None):
elif re.search(r_suballpages3, raw):
r_suballpages = r_suballpages3
c = 0
- oldfr = ""
+ # oldfr = ""
checked_suballpages = []
rawacum = raw
while r_suballpages and re.search(r_suballpages, raw) and c < deep:
@@ -105,10 +108,10 @@ def getPageTitlesScraper(config: Config = None, session=None):
if name not in checked_suballpages:
# to avoid reload dupe subpages links
checked_suballpages.append(name)
- Delay(config=config, session=session)
+ Delay(config=config)
# print ('Fetching URL: ', url)
r = session.get(url=url, timeout=10)
- raw = str(r.text)
+ raw = r.text
raw = cleanHTML(raw)
rawacum += raw # merge it after removed junk
print(
@@ -122,27 +125,26 @@ def getPageTitlesScraper(config: Config = None, session=None):
"pages",
)
- Delay(config=config, session=session)
+ Delay(config=config)
assert (
currfr is not None
), "re.search found the pattern, but re.finditer fails, why?"
- oldfr = currfr
+ # oldfr = currfr
c += 1
c = 0
m = re.compile(r_title).finditer(rawacum)
for i in m:
t = undoHTMLEntities(text=i.group("title"))
- if not t.startswith("Special:"):
- if t not in titles:
- titles.append(t)
- c += 1
+ if not t.startswith("Special:") and t not in titles:
+ titles.append(t)
+ c += 1
print(" %d titles retrieved in the namespace %d" % (c, namespace))
return titles
-def getPageTitles(config: Config = None, session=None):
+def getPageTitles(config: Config, session: requests.Session):
"""Get list of page titles"""
# http://en.wikipedia.org/wiki/Special:AllPages
# http://wiki.archiveteam.org/index.php?title=Special:AllPages
@@ -168,7 +170,7 @@ def getPageTitles(config: Config = None, session=None):
if config.api:
try:
titles = getPageTitlesAPI(config=config, session=session)
- except:
+ except Exception:
print("Error: could not get page titles from the API")
titles = getPageTitlesScraper(config=config, session=session)
elif config.index:
@@ -193,7 +195,7 @@ def getPageTitles(config: Config = None, session=None):
def checkTitleOk(
- config: Config = None,
+ config: Config,
):
try:
with FileReadBackwards(
@@ -208,13 +210,13 @@ def checkTitleOk(
lasttitle = frb.readline().strip()
if lasttitle == "":
lasttitle = frb.readline().strip()
- except:
+ except Exception:
lasttitle = "" # probably file does not exists
return lasttitle == "--END--"
-def readTitles(config: Config = None, session=None, start=None, batch=False):
+def readTitles(config: Config, session: requests.Session, start: str, batch: bool):
"""Read title list from a file, from the title "start" """
if not checkTitleOk(config):
getPageTitles(config=config, session=session)
@@ -225,7 +227,7 @@ def readTitles(config: Config = None, session=None, start=None, batch=False):
titlesfile = open(f"{config.path}/{titlesfilename}", encoding="utf-8")
titlelist = []
- seeking = start is not None
+ seeking = start != ""
with titlesfile as f:
for line in f:
title = line.strip()
diff --git a/wikiteam3/dumpgenerator/api/wiki_check.py b/wikiteam3/dumpgenerator/api/wiki_check.py
index 93e0465e..b5d9b0d1 100644
--- a/wikiteam3/dumpgenerator/api/wiki_check.py
+++ b/wikiteam3/dumpgenerator/api/wiki_check.py
@@ -5,13 +5,13 @@
from wikiteam3.utils import getUserAgent
-def getWikiEngine(url="", session: requests.Session = None) -> str:
+def getWikiEngine(url: str, session: requests.Session) -> str:
"""Returns the wiki engine of a URL, if known"""
if not session:
session = requests.Session() # Create a new session
session.headers.update({"User-Agent": getUserAgent()})
- r = session.post(url=url, timeout=30)
+ r = session.post(url=url, timeout=30) # type: ignore
if r.status_code == 405 or not r.text:
r = session.get(url=url, timeout=120)
result = r.text
diff --git a/wikiteam3/dumpgenerator/cli/cli.py b/wikiteam3/dumpgenerator/cli/cli.py
index 582ca862..bad2e2e3 100644
--- a/wikiteam3/dumpgenerator/cli/cli.py
+++ b/wikiteam3/dumpgenerator/cli/cli.py
@@ -6,7 +6,7 @@
import queue
import re
import sys
-from typing import *
+from typing import Any, Dict, Literal, Tuple
import requests
import urllib3
@@ -15,10 +15,9 @@
from wikiteam3.dumpgenerator.api.index_check import checkIndex
from wikiteam3.dumpgenerator.config import Config, newConfig
from wikiteam3.dumpgenerator.version import getVersion
-from wikiteam3.utils import domain2prefix, getUserAgent, mod_requests_text
-from wikiteam3.utils.login import uniLogin
+from wikiteam3.utils import domain2prefix, getUserAgent, mod_requests_text, uniLogin
+from wikiteam3.utils.user_agent import setupUserAgent
-from ...utils.user_agent import setupUserAgent
from .delay import Delay
@@ -223,13 +222,13 @@ def getParameters(params=None) -> Tuple[Config, Dict]:
########################################
# Create session
- mod_requests_text(requests) # monkey patch
+ mod_requests_text(requests) # type: ignore # monkey patch
session = requests.Session()
# Disable SSL verification
if args.insecure:
session.verify = False
- requests.packages.urllib3.disable_warnings()
+ urllib3.disable_warnings()
print("WARNING: SSL certificate verification disabled")
# Custom session retry
@@ -241,14 +240,12 @@ def getParameters(params=None) -> Tuple[Config, Dict]:
class CustomRetry(Retry):
def increment(self, method=None, url=None, *args, **kwargs):
if "_pool" in kwargs:
- conn = kwargs[
- "_pool"
- ] # type: urllib3.connectionpool.HTTPSConnectionPool
+ conn: urllib3.connectionpool.HTTPSConnectionPool = kwargs["_pool"]
if "response" in kwargs:
try:
# drain conn in advance so that it won't be put back into conn.pool
kwargs["response"].drain_conn()
- except:
+ except Exception:
pass
# Useless, retry happens inside urllib3
# for adapters in session.adapters.values():
@@ -256,12 +253,12 @@ def increment(self, method=None, url=None, *args, **kwargs):
# adapters.poolmanager.clear()
# Close existing connection so that a new connection will be used
- if hasattr(conn, "pool"):
+ if hasattr(conn, "pool") and conn.pool is not None:
pool = conn.pool # type: queue.Queue
try:
# Don't directly use this, This closes connection pool by making conn.pool = None
conn.close()
- except:
+ except Exception:
pass
conn.pool = pool
return super().increment(method=method, url=url, *args, **kwargs)
@@ -274,7 +271,8 @@ def sleep(self, response=None):
msg = "req retry (%s)" % response.status
else:
msg = None
- Delay(config=None, session=session, msg=msg, delay=backoff)
+ # config=None
+ Delay(config=config, msg=msg, delay=backoff)
__retries__ = CustomRetry(
total=int(args.retries),
@@ -292,7 +290,7 @@ def sleep(self, response=None):
)
session.mount("https://", HTTPAdapter(max_retries=__retries__))
session.mount("http://", HTTPAdapter(max_retries=__retries__))
- except:
+ except Exception:
# Our urllib3/requests is too old
pass
@@ -301,7 +299,7 @@ def sleep(self, response=None):
if args.cookies:
cj.load(args.cookies)
print("Using cookies from %s" % args.cookies)
- session.cookies = cj
+ session.cookies = cj # type: ignore
# Setup user agent
session.headers.update({"User-Agent": getUserAgent()})
@@ -312,17 +310,17 @@ def sleep(self, response=None):
session.auth = (args.user, args.password)
# Execute meta info params
- if args.wiki:
- if args.get_wiki_engine:
- print(getWikiEngine(url=args.wiki, session=session))
- sys.exit(0)
+ if args.wiki and args.get_wiki_engine:
+ print(getWikiEngine(url=args.wiki, session=session))
+ sys.exit(0)
# Get API and index and verify
- api = args.api if args.api else ""
- index = args.index if args.index else ""
+ api: str = args.api or ""
+ index: str = args.index or ""
if api == "" or index == "":
if args.wiki:
if getWikiEngine(args.wiki, session=session) == "MediaWiki":
+ index2: str
api2, index2 = mwGetAPIAndIndex(args.wiki, session=session)
if not api:
api = api2
@@ -339,9 +337,12 @@ def sleep(self, response=None):
# print (api)
# print (index)
- index2 = None
+ index2 = ""
- check, checkedapi = False, None
+ check: (
+ tuple[Literal[True], Any, str] | tuple[Literal[True], None, str] | None
+ ) = False # type: ignore
+ checkedapi = ""
if api:
check, checkedapi = checkRetryAPI(
api=api,
@@ -349,9 +350,9 @@ def sleep(self, response=None):
session=session,
)
- if api and check:
+ if api != "" and check:
# Replace the index URL we got from the API check
- index2 = check[1]
+ index2 = str(check[1])
api = checkedapi
print("API is OK: ", checkedapi)
else:
@@ -391,8 +392,10 @@ def sleep(self, response=None):
try:
index = "/".join(index.split("/")[:-1])
except AttributeError:
- index = None
- if index and checkIndex(index=index, cookies=args.cookies, session=session):
+ index = ""
+ if index != "" and checkIndex(
+ index=index, cookies=args.cookies, session=session
+ ):
print("index.php is OK")
else:
print("Error in index.php.")
@@ -473,7 +476,7 @@ def sleep(self, response=None):
# calculating path, if not defined by user with --path=
if not config.path:
config.path = "./{}-{}-wikidump".format(
- domain2prefix(config=config, session=session),
+ domain2prefix(config=config),
config.date,
)
print("No --path argument provided. Defaulting to:")
diff --git a/wikiteam3/dumpgenerator/cli/delay.py b/wikiteam3/dumpgenerator/cli/delay.py
index 7ebbd021..64e64cd7 100644
--- a/wikiteam3/dumpgenerator/cli/delay.py
+++ b/wikiteam3/dumpgenerator/cli/delay.py
@@ -1,5 +1,3 @@
-import itertools
-import sys
import threading
import time
@@ -21,7 +19,7 @@ def animate(self):
time.sleep(0.3)
- def __init__(self, config: Config = None, session=None, msg=None, delay=None):
+ def __init__(self, config: Config, msg=None, delay=None):
"""Add a delay if configured for that"""
self.ellipses: str = "."
diff --git a/wikiteam3/dumpgenerator/config.py b/wikiteam3/dumpgenerator/config.py
index 21dbff32..97b64424 100644
--- a/wikiteam3/dumpgenerator/config.py
+++ b/wikiteam3/dumpgenerator/config.py
@@ -19,10 +19,12 @@
}
"""
+import contextlib
import dataclasses
import json
import sys
-from typing import *
+from dataclasses import field
+from typing import List
def _dataclass_from_dict(klass_or_obj, d):
@@ -43,7 +45,7 @@ def asdict(self):
retries: int = 0
path: str = ""
logs: bool = False
- date: str = False
+ date: str = ""
# URL params
index: str = ""
@@ -56,8 +58,8 @@ def asdict(self):
xmlrevisions: bool = False
xmlrevisions_page: bool = False
images: bool = False
- namespaces: List[int] = None
- exnamespaces: List[int] = None
+ namespaces: List[int] = field(default_factory=lambda: [])
+ exnamespaces: List[int] = field(default_factory=lambda: [])
api_chunksize: int = 0 # arvlimit, ailimit, etc
export: str = "" # Special:Export page name
@@ -73,24 +75,21 @@ def newConfig(configDict) -> Config:
return _dataclass_from_dict(Config, configDict)
-def loadConfig(config: Config = None, configfilename=""):
+def loadConfig(config: Config, configfilename=""):
"""Load config file"""
configDict = dataclasses.asdict(config)
if config.path:
- try:
+ with contextlib.suppress(Exception):
with open(f"{config.path}/{configfilename}", encoding="utf-8") as infile:
configDict.update(json.load(infile))
return newConfig(configDict)
- except:
- pass
-
print("There is no config file. we can't resume. Start a new dump.")
sys.exit()
-def saveConfig(config: Config = None, configfilename=""):
+def saveConfig(config: Config, configfilename=""):
"""Save config file"""
with open(f"{config.path}/{configfilename}", "w", encoding="utf-8") as outfile:
diff --git a/wikiteam3/dumpgenerator/dump/generator.py b/wikiteam3/dumpgenerator/dump/generator.py
index 80ca3c4f..41fa132d 100644
--- a/wikiteam3/dumpgenerator/dump/generator.py
+++ b/wikiteam3/dumpgenerator/dump/generator.py
@@ -1,10 +1,12 @@
try:
import contextlib
- import http.cookiejar
+
+ # import http.cookiejar
import os
import re
import sys
import traceback
+ from typing import List
from file_read_backwards import FileReadBackwards
@@ -20,7 +22,7 @@
)
sys.exit(1)
-from typing import *
+from typing import Dict
from wikiteam3.dumpgenerator.cli import bye, getParameters, welcome
from wikiteam3.dumpgenerator.config import Config, loadConfig, saveConfig
@@ -75,7 +77,7 @@ def __init__(params=None):
else contextlib.nullcontext()
):
print(welcome())
- print(f"Analysing {config.api if config.api else config.index}")
+ print(f"Analysing {config.api or config.index}")
# creating path or resuming if desired
c = 2
@@ -124,57 +126,58 @@ def __init__(params=None):
bye()
@staticmethod
- def createNewDump(config: Config = None, other: Dict = None):
+ def createNewDump(config: Config, other: Dict):
+ # other: Dict = None
# we do lazy title dumping here :)
images = []
print("Trying generating a new dump into a new directory...")
if config.xml:
- generateXMLDump(config=config, session=other["session"])
+ generateXMLDump(config=config, resume=False, session=other["session"])
checkXMLIntegrity(config=config, session=other["session"])
if config.images:
images += Image.getImageNames(config=config, session=other["session"])
- Image.saveImageNames(config=config, images=images, session=other["session"])
+ Image.saveImageNames(config=config, images=images)
Image.generateImageDump(
config=config, other=other, images=images, session=other["session"]
)
if config.logs:
saveLogs(config=config, session=other["session"])
+ # other: Dict = None
@staticmethod
- def resumePreviousDump(config: Config = None, other: Dict = None):
- images = []
+ def resumePreviousDump(config: Config, other: Dict):
+ images: List[str] = []
print("Resuming previous dump process...")
if config.xml:
# checking xml dump
xmliscomplete = False
lastxmltitle = None
lastxmlrevid = None
- try:
+
+ # Exception means probably file does not exist
+ with contextlib.suppress(Exception):
with FileReadBackwards(
"%s/%s-%s-%s.xml"
% (
config.path,
- domain2prefix(config=config, session=other["session"]),
+ domain2prefix(config=config),
config.date,
"current" if config.curonly else "history",
),
encoding="utf-8",
) as frb:
- for l in frb:
- if l.strip() == "":
+ for line in frb:
+ if line.strip() == "":
# xml dump is complete
xmliscomplete = True
break
- if xmlrevid := re.search(r" ([^<]+)", l):
+ if xmlrevid := re.search(r" ([^<]+)", line):
lastxmlrevid = int(xmlrevid.group(1))
- if xmltitle := re.search(r"([^<]+)", l):
+ if xmltitle := re.search(r"([^<]+)", line):
lastxmltitle = undoHTMLEntities(text=xmltitle.group(1))
break
- except:
- pass # probably file does not exists
-
if xmliscomplete:
print("XML dump was completed in the previous session")
elif lastxmltitle:
@@ -190,7 +193,7 @@ def resumePreviousDump(config: Config = None, other: Dict = None):
else:
# corrupt? only has XML header?
print("XML is corrupt? Regenerating...")
- generateXMLDump(config=config, session=other["session"])
+ generateXMLDump(config=config, resume=False, session=other["session"])
if config.images:
# load images list
@@ -203,7 +206,9 @@ def resumePreviousDump(config: Config = None, other: Dict = None):
if os.path.exists(imagesFilePath):
with open(imagesFilePath) as f:
lines = f.read().splitlines()
- images.extend(l.split("\t") for l in lines if re.search(r"\t", l))
+ images.extend(
+ line.split("\t") for line in lines if re.search(r"\t", line)
+ )
if len(lines) == 0: # empty file
lastimage = "--EMPTY--"
if not lastimage:
@@ -226,16 +231,14 @@ def resumePreviousDump(config: Config = None, other: Dict = None):
Image.saveImageNames(config=config, images=images)
# checking images directory
listdir = []
- try:
+ with contextlib.suppress(OSError):
listdir = os.listdir(f"{config.path}/images")
- except OSError:
- pass # probably directory does not exist
listdir = set(listdir)
c_desc = 0
c_images = 0
c_checked = 0
for filename, url, uploader, size, sha1 in images:
- lastfilename = filename
+ # lastfilename = filename
if other["filenamelimit"] < len(filename.encode("utf-8")):
logerror(
config=config,
diff --git a/wikiteam3/dumpgenerator/dump/image/image.py b/wikiteam3/dumpgenerator/dump/image/image.py
index b79e9ebb..f5eedfc3 100644
--- a/wikiteam3/dumpgenerator/dump/image/image.py
+++ b/wikiteam3/dumpgenerator/dump/image/image.py
@@ -4,7 +4,7 @@
import sys
import time
import urllib.parse
-from typing import Dict, List, Optional
+from typing import Dict, List
import requests
@@ -20,19 +20,19 @@
class Image:
@staticmethod
- def getXMLFileDesc(config: Config = None, title="", session=None):
+ def getXMLFileDesc(config: Config, title: str, session: requests.Session):
"""Get XML for image description page"""
- config.curonly = 1 # tricky to get only the most recent desc
+ config.curonly = True # tricky to get only the most recent desc
return "".join(
list(getXMLPage(config=config, title=title, verbose=False, session=session))
)
+ # other: Dict = None,
+ # images: List[List] = None,
+ # session: requests.Session = None,
@staticmethod
def generateImageDump(
- config: Config = None,
- other: Dict = None,
- images: List[List] = None,
- session: requests.Session = None,
+ config: Config, other: Dict, images: List[List], session: requests.Session
):
"""Save files and descriptions using a file list\n
Deprecated: `start` is not used anymore."""
@@ -49,7 +49,9 @@ def generateImageDump(
bypass_cdn_image_compression: bool = other["bypass_cdn_image_compression"]
- def modify_params(params: Optional[Dict] = None) -> Dict:
+ def modify_params(
+ params: Dict[str, (str | int)] = {}
+ ) -> Dict[str, (str | int)]:
"""bypass Cloudflare Polish (image optimization)"""
if params is None:
params = {}
@@ -101,7 +103,7 @@ def check_response(r: requests.Response) -> None:
+ "we will not try to download it...",
)
else:
- Delay(config=config, session=session)
+ Delay(config=config)
original_url = url
r = session.head(url=url, params=modify_params(), allow_redirects=True)
check_response(r)
@@ -116,17 +118,20 @@ def check_response(r: requests.Response) -> None:
check_response(r)
# Try to fix a broken HTTP to HTTPS redirect
- if r.status_code == 404 and original_url_redirected:
- if (
+ if (
+ r.status_code == 404
+ and original_url_redirected
+ and (
original_url.split("://")[0] == "http"
and url.split("://")[0] == "https"
- ):
- url = "https://" + original_url.split("://")[1]
- # print 'Maybe a broken http to https redirect, trying ', url
- r = session.get(
- url=url, params=modify_params(), allow_redirects=False
- )
- check_response(r)
+ )
+ ):
+ url = "https://" + original_url.split("://")[1]
+ # print 'Maybe a broken http to https redirect, trying ', url
+ r = session.get(
+ url=url, params=modify_params(), allow_redirects=False
+ )
+ check_response(r)
if r.status_code == 200:
try:
@@ -160,7 +165,7 @@ def check_response(r: requests.Response) -> None:
if os.path.isfile(f"{filename3}.desc"):
toContinue += 1
else:
- Delay(config=config, session=session)
+ Delay(config=config)
# saving description if any
title = f"Image:{filename}"
try:
@@ -231,7 +236,7 @@ def check_response(r: requests.Response) -> None:
)
@staticmethod
- def getImageNames(config: Config = None, session: requests.Session = None):
+ def getImageNames(config: Config, session: requests.Session):
"""Get list of image names"""
print(")Retrieving image filenames")
@@ -251,7 +256,7 @@ def getImageNames(config: Config = None, session: requests.Session = None):
return images
@staticmethod
- def getImageNamesScraper(config: Config = None, session: requests.Session = None):
+ def getImageNamesScraper(config: Config, session: requests.Session):
"""Retrieve file list: filename, url, uploader"""
images = []
@@ -268,7 +273,7 @@ def getImageNamesScraper(config: Config = None, session: requests.Session = None
timeout=30,
)
raw = r.text
- Delay(config=config, session=session)
+ Delay(config=config)
# delicate wiki
if re.search(
r"(?i)(allowed memory size of \d+ bytes exhausted|Call to a member function getURL)",
@@ -345,7 +350,7 @@ def getImageNamesScraper(config: Config = None, session: requests.Session = None
return images
@staticmethod
- def getImageNamesAPI(config: Config = None, session: requests.Session = None):
+ def getImageNamesAPI(config: Config, session: requests.Session):
"""Retrieve file list: filename, url, uploader, size, sha1"""
# # Commented by @yzqzss:
# https://www.mediawiki.org/wiki/API:Allpages
@@ -377,7 +382,7 @@ def getImageNamesAPI(config: Config = None, session: requests.Session = None):
r = session.get(url=config.api, params=params, timeout=30)
handleStatusCode(r)
jsonimages = getJSON(r)
- Delay(config=config, session=session)
+ Delay(config=config)
if "query" in jsonimages:
countImages += len(jsonimages["query"]["allimages"])
@@ -465,7 +470,7 @@ def getImageNamesAPI(config: Config = None, session: requests.Session = None):
r = session.get(url=config.api, params=params, timeout=30)
handleStatusCode(r)
jsonimages = getJSON(r)
- Delay(config=config, session=session)
+ Delay(config=config)
if "query" not in jsonimages:
# if the API doesn't return query data, then we're done
@@ -512,7 +517,7 @@ def getImageNamesAPI(config: Config = None, session: requests.Session = None):
return images
@staticmethod
- def saveImageNames(config: Config = None, images: List[List] = None, session=None):
+ def saveImageNames(config: Config, images: List[List]):
"""Save image list in a file, including filename, url, uploader, size and sha1"""
imagesfilename = "{}-{}-images.txt".format(
@@ -545,7 +550,7 @@ def saveImageNames(config: Config = None, images: List[List] = None, session=Non
print("Image filenames and URLs saved at...", imagesfilename)
@staticmethod
- def curateImageURL(config: Config = None, url=""):
+ def curateImageURL(config: Config, url=""):
"""Returns an absolute URL for an image, adding the domain if missing"""
if config.index:
diff --git a/wikiteam3/dumpgenerator/dump/misc/index_php.py b/wikiteam3/dumpgenerator/dump/misc/index_php.py
index b2ae3279..ac96adf6 100644
--- a/wikiteam3/dumpgenerator/dump/misc/index_php.py
+++ b/wikiteam3/dumpgenerator/dump/misc/index_php.py
@@ -1,20 +1,22 @@
import os
+import requests
+
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.config import Config
from wikiteam3.utils import removeIP
-def saveIndexPHP(config: Config = None, session=None):
+def saveIndexPHP(config: Config, session: requests.Session):
"""Save index.php as .html, to preserve license details available at the botom of the page"""
if os.path.exists(f"{config.path}/index.html"):
print("index.html exists, do not overwrite")
else:
print("Downloading index.php (Main Page) as index.html")
- r = session.post(url=config.index, params=None, timeout=10)
- raw = str(r.text)
- Delay(config=config, session=session)
+ r = session.post(url=config.index, params=None, timeout=10) # type: ignore
+ raw = r.text
+ Delay(config=config)
raw = removeIP(raw=raw)
with open(f"{config.path}/index.html", "w", encoding="utf-8") as outfile:
outfile.write(raw)
diff --git a/wikiteam3/dumpgenerator/dump/misc/site_info.py b/wikiteam3/dumpgenerator/dump/misc/site_info.py
index 0a8160f0..a357017b 100644
--- a/wikiteam3/dumpgenerator/dump/misc/site_info.py
+++ b/wikiteam3/dumpgenerator/dump/misc/site_info.py
@@ -1,58 +1,61 @@
import json
import os
+import requests
+
from wikiteam3.dumpgenerator.api import getJSON
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.config import Config
-def saveSiteInfo(config: Config = None, session=None):
+def saveSiteInfo(config: Config, session: requests.Session):
"""Save a file with site info"""
if not config.api:
return
if os.path.exists(f"{config.path}/siteinfo.json"):
print("siteinfo.json exists, do not overwrite")
- else:
- print("Downloading site info as siteinfo.json")
+ return
+
+ print("Downloading site info as siteinfo.json")
- # MediaWiki 1.13+
+ # MediaWiki 1.13+
+ r = session.get(
+ url=config.api,
+ params={
+ "action": "query",
+ "meta": "siteinfo",
+ "siprop": "general|namespaces|statistics|dbrepllag|interwikimap|namespacealiases|specialpagealiases|usergroups|extensions|skins|magicwords|fileextensions|rightsinfo",
+ "sinumberingroup": 1,
+ "format": "json",
+ },
+ timeout=10,
+ )
+ # MediaWiki 1.11-1.12
+ if "query" not in getJSON(r):
+ r = session.get(
+ url=config.api,
+ params={
+ "action": "query",
+ "meta": "siteinfo",
+ "siprop": "general|namespaces|statistics|dbrepllag|interwikimap",
+ "format": "json",
+ },
+ timeout=10,
+ )
+ # MediaWiki 1.8-1.10
+ if "query" not in getJSON(r):
r = session.get(
url=config.api,
params={
"action": "query",
"meta": "siteinfo",
- "siprop": "general|namespaces|statistics|dbrepllag|interwikimap|namespacealiases|specialpagealiases|usergroups|extensions|skins|magicwords|fileextensions|rightsinfo",
- "sinumberingroup": 1,
+ "siprop": "general|namespaces",
"format": "json",
},
timeout=10,
)
- # MediaWiki 1.11-1.12
- if "query" not in getJSON(r):
- r = session.get(
- url=config.api,
- params={
- "action": "query",
- "meta": "siteinfo",
- "siprop": "general|namespaces|statistics|dbrepllag|interwikimap",
- "format": "json",
- },
- timeout=10,
- )
- # MediaWiki 1.8-1.10
- if "query" not in getJSON(r):
- r = session.get(
- url=config.api,
- params={
- "action": "query",
- "meta": "siteinfo",
- "siprop": "general|namespaces",
- "format": "json",
- },
- timeout=10,
- )
- result = getJSON(r)
- Delay(config=config, session=session)
- with open(f"{config.path}/siteinfo.json", "w", encoding="utf-8") as outfile:
- outfile.write(json.dumps(result, indent=4, sort_keys=True))
+ result = getJSON(r)
+ Delay(config=config)
+ with open(f"{config.path}/siteinfo.json", "w", encoding="utf-8") as outfile:
+ outfile.write(json.dumps(result, indent=4, sort_keys=True))
diff --git a/wikiteam3/dumpgenerator/dump/misc/special_logs.py b/wikiteam3/dumpgenerator/dump/misc/special_logs.py
index 0b35939d..666c8a1e 100644
--- a/wikiteam3/dumpgenerator/dump/misc/special_logs.py
+++ b/wikiteam3/dumpgenerator/dump/misc/special_logs.py
@@ -1,8 +1,10 @@
+import requests
+
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.config import Config
-def saveLogs(config: Config = None, session=None):
+def saveLogs(config: Config, session: requests.Session):
"""Save Special:Log"""
# get all logs from Special:Log
"""parse
@@ -20,4 +22,4 @@ def saveLogs(config: Config = None, session=None):
"""
- Delay(config=config, session=session)
+ Delay(config=config)
diff --git a/wikiteam3/dumpgenerator/dump/misc/special_version.py b/wikiteam3/dumpgenerator/dump/misc/special_version.py
index 55473373..c15e175f 100644
--- a/wikiteam3/dumpgenerator/dump/misc/special_version.py
+++ b/wikiteam3/dumpgenerator/dump/misc/special_version.py
@@ -1,11 +1,13 @@
import os
+import requests
+
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.config import Config
from wikiteam3.utils import removeIP
-def saveSpecialVersion(config: Config = None, session=None):
+def saveSpecialVersion(config: Config, session: requests.Session):
"""Save Special:Version as .html, to preserve extensions details"""
if os.path.exists(f"{config.path}/SpecialVersion.html"):
@@ -13,10 +15,10 @@ def saveSpecialVersion(config: Config = None, session=None):
else:
print("Downloading Special:Version with extensions and other related info")
r = session.post(
- url=config.index, params={"title": "Special:Version"}, timeout=10
+ url=config.index, params={"title": "Special:Version"}, timeout=10 # type: ignore
)
- raw = str(r.text)
- Delay(config=config, session=session)
+ raw = r.text
+ Delay(config=config)
raw = str(removeIP(raw=raw))
with open(
f"{config.path}/SpecialVersion.html", "w", encoding="utf-8"
diff --git a/wikiteam3/dumpgenerator/dump/page/xmlexport/page_xml.py b/wikiteam3/dumpgenerator/dump/page/xmlexport/page_xml.py
index 277b05f9..59d9d6e8 100644
--- a/wikiteam3/dumpgenerator/dump/page/xmlexport/page_xml.py
+++ b/wikiteam3/dumpgenerator/dump/page/xmlexport/page_xml.py
@@ -1,10 +1,13 @@
+import requests
+
from wikiteam3.dumpgenerator.config import Config
from .page_xml_api import getXMLPageWithApi
from .page_xml_export import getXMLPageWithExport
-def getXMLPage(config: Config = None, title="", verbose=True, session=None):
+# title="", verbose=True
+def getXMLPage(config: Config, title: str, verbose: bool, session: requests.Session):
if config.xmlapiexport:
return getXMLPageWithApi(
config=config, title=title, verbose=verbose, session=session
diff --git a/wikiteam3/dumpgenerator/dump/page/xmlexport/page_xml_api.py b/wikiteam3/dumpgenerator/dump/page/xmlexport/page_xml_api.py
index 9e9b676e..f6a158ae 100644
--- a/wikiteam3/dumpgenerator/dump/page/xmlexport/page_xml_api.py
+++ b/wikiteam3/dumpgenerator/dump/page/xmlexport/page_xml_api.py
@@ -1,7 +1,7 @@
import re
import time
import traceback
-from typing import *
+from typing import Dict
import requests
@@ -11,58 +11,71 @@
from wikiteam3.dumpgenerator.log import logerror
try:
- import xml.etree.ElementTree as ET
+ import xml.etree.ElementTree as ElementTree
except ImportError:
- import xml.etree.ElementTree as ET
+ import xml.etree.ElementTree as ElementTree
import xml.dom.minidom as MD
-def reconstructRevisions(root=None):
- # print ET.tostring(rev)
- page = ET.Element("stub")
+def reconstructRevisions(root: ElementTree.Element):
+ # print ElementTree.tostring(rev)
+ page = ElementTree.Element("stub")
edits = 0
- for rev in (
- root.find("query").find("pages").find("page").find("revisions").findall("rev")
- ):
+
+ query: (ElementTree.Element | None) = root.find("query")
+ if query is None:
+ raise ValueError("query was none")
+ pages: (ElementTree.Element | None) = query.find("pages")
+ if pages is None:
+ raise ValueError("pages was none")
+ page_element: (ElementTree.Element | None) = query.find("page")
+ if page_element is None:
+ raise ValueError("page was none")
+ revisions: (ElementTree.Element | None) = page_element.find("revisions")
+ if revisions is None:
+ raise ValueError("revisions was none")
+ for rev in revisions.findall("rev"):
try:
- rev_ = ET.SubElement(page, "revision")
+ rev_ = ElementTree.SubElement(page, "revision")
# id
- ET.SubElement(rev_, "id").text = rev.attrib["revid"]
+ ElementTree.SubElement(rev_, "id").text = rev.attrib["revid"]
# parentid (optional, export-0.7+)
if "parentid" in rev.attrib:
- ET.SubElement(rev_, "parentid").text = rev.attrib["parentid"]
+ ElementTree.SubElement(rev_, "parentid").text = rev.attrib["parentid"]
# timestamp
- ET.SubElement(rev_, "timestamp").text = rev.attrib["timestamp"]
+ ElementTree.SubElement(rev_, "timestamp").text = rev.attrib["timestamp"]
# contributor
- contributor = ET.SubElement(rev_, "contributor")
+ contributor = ElementTree.SubElement(rev_, "contributor")
if "userhidden" not in rev.attrib:
- ET.SubElement(contributor, "username").text = rev.attrib["user"]
- ET.SubElement(contributor, "id").text = rev.attrib["userid"]
+ ElementTree.SubElement(contributor, "username").text = rev.attrib[
+ "user"
+ ]
+ ElementTree.SubElement(contributor, "id").text = rev.attrib["userid"]
else:
contributor.set("deleted", "deleted")
# comment (optional)
if "commenthidden" in rev.attrib:
print("commenthidden")
- comment = ET.SubElement(rev_, "comment")
+ comment = ElementTree.SubElement(rev_, "comment")
comment.set("deleted", "deleted")
elif "comment" in rev.attrib and rev.attrib["comment"]: # '' is empty
- comment = ET.SubElement(rev_, "comment")
+ comment = ElementTree.SubElement(rev_, "comment")
comment.text = rev.attrib["comment"]
# minor edit (optional)
if "minor" in rev.attrib:
- ET.SubElement(rev_, "minor")
+ ElementTree.SubElement(rev_, "minor")
# model and format (optional, export-0.8+)
if "contentmodel" in rev.attrib:
- ET.SubElement(rev_, "model").text = rev.attrib[
+ ElementTree.SubElement(rev_, "model").text = rev.attrib[
"contentmodel"
] # default: 'wikitext'
if "contentformat" in rev.attrib:
- ET.SubElement(rev_, "format").text = rev.attrib[
+ ElementTree.SubElement(rev_, "format").text = rev.attrib[
"contentformat"
] # default: 'text/x-wiki'
# text
- text = ET.SubElement(rev_, "text")
+ text = ElementTree.SubElement(rev_, "text")
if "texthidden" not in rev.attrib:
text.attrib["xml:space"] = "preserve"
text.attrib["bytes"] = rev.attrib["size"]
@@ -72,24 +85,28 @@ def reconstructRevisions(root=None):
text.set("deleted", "deleted")
# sha1
if "sha1" in rev.attrib:
- sha1 = ET.SubElement(rev_, "sha1")
+ sha1 = ElementTree.SubElement(rev_, "sha1")
sha1.text = rev.attrib["sha1"]
elif "sha1hidden" in rev.attrib:
- ET.SubElement(rev_, "sha1") # stub
+ ElementTree.SubElement(rev_, "sha1") # stub
edits += 1
except Exception as e:
- # logerror(config=config, text='Error reconstructing revision, xml:%s' % (ET.tostring(rev)))
- print(ET.tostring(rev))
+ # logerror(config=config, text='Error reconstructing revision, xml:%s' % (ElementTree.tostring(rev)))
+ print(ElementTree.tostring(rev))
traceback.print_exc()
- page = None
+ page = None # type: ignore
edits = 0
raise e
return page, edits
+# headers: Dict = None, params: Dict = None
def getXMLPageCoreWithApi(
- headers: Dict = None, params: Dict = None, config: Config = None, session=None
+ headers: Dict,
+ params: Dict[str, (str | int)],
+ config: Config,
+ session: requests.Session,
):
""" """
# just send the API request
@@ -101,7 +118,7 @@ def getXMLPageCoreWithApi(
increment = 20 # increment every retry
while not re.search(
- r"" if not config.curonly else r"", xml
+ r"" if config.curonly else r"", xml
) or re.search(r"", xml):
if c > 0 and c < maxretries:
wait = (
@@ -114,8 +131,8 @@ def getXMLPageCoreWithApi(
time.sleep(wait)
# reducing server load requesting smallest chunks (if curonly then
# rvlimit = 1 from mother function)
- if params["rvlimit"] > 1:
- params["rvlimit"] = params["rvlimit"] / 2 # half
+ if int(params["rvlimit"]) > 1:
+ params["rvlimit"] = int(params["rvlimit"]) // 2 # half
if c >= maxretries:
print(" We have retried %d times" % (c))
print(
@@ -130,7 +147,7 @@ def getXMLPageCoreWithApi(
print(" Saving in the errors log, and skipping...")
logerror(
config=config,
- text=f'Error while retrieving the last revision of "{params["titles" if config.xmlapiexport else "pages"].decode("utf-8")}". Skipping.',
+ text=f'Error while retrieving the last revision of "{params["titles" if config.xmlapiexport else "pages"]}". Skipping.', # .decode("utf-8")
)
raise ExportAbortedError(config.index)
# FIXME HANDLE HTTP Errors HERE
@@ -149,7 +166,10 @@ def getXMLPageCoreWithApi(
return xml
-def getXMLPageWithApi(config: Config = None, title="", verbose=True, session=None):
+# title="", verbose=True
+def getXMLPageWithApi(
+ config: Config, title: str, verbose: bool, session: requests.Session
+):
"""Get the full history (or current only) of a page using API:Query
if params['curonly'] is set, then using export&exportwrap to export
"""
@@ -170,42 +190,52 @@ def getXMLPageWithApi(config: Config = None, title="", verbose=True, session=Non
"rvcontinue": None,
"rvlimit": config.api_chunksize,
}
- firstpartok = False
- lastcontinue = None
+ firstpartok: bool = False
+ lastcontinue: str = ""
numberofedits = 0
ret = ""
- continueKey: Optional[str] = None
+ continueKey: str = ""
while True:
# in case the last request is not right, saving last time's progress
if not firstpartok:
try:
lastcontinue = params[continueKey]
- except:
- lastcontinue = None
+ except Exception:
+ lastcontinue = ""
- xml = getXMLPageCoreWithApi(params=params, config=config, session=session)
+ xml = getXMLPageCoreWithApi(
+ headers={}, params=params, config=config, session=session
+ )
if xml == "":
# just return so that we can continue, and getXMLPageCoreWithApi will log the error
return
try:
- root = ET.fromstring(xml.encode("utf-8"))
- except:
+ root = ElementTree.fromstring(xml.encode("utf-8"))
+ except Exception:
continue
try:
- retpage = root.find("query").find("pages").find("page")
- except:
+ ret_query: (ElementTree.Element | None) = root.find("query")
+ if ret_query is None:
+ raise Exception("query was none")
+ ret_pages: (ElementTree.Element | None) = root.find("pages")
+ if ret_pages is None:
+ raise Exception("pages was none")
+ ret_page = ret_pages.find("page")
+ if ret_page is None:
+ continue
+ except Exception:
continue
- if "missing" in retpage.attrib or "invalid" in retpage.attrib:
+ if "missing" in ret_page.attrib or "invalid" in ret_page.attrib:
print("Page not found")
raise PageMissingError(params["titles"], xml)
if not firstpartok:
try:
# build the firstpart by ourselves to improve the memory usage
ret = " \n"
- ret += " %s\n" % (retpage.attrib["title"])
- ret += " %s\n" % (retpage.attrib["ns"])
- ret += " %s\n" % (retpage.attrib["pageid"])
- except:
+ ret += " %s\n" % (ret_page.attrib["title"])
+ ret += " %s\n" % (ret_page.attrib["ns"])
+ ret += " %s\n" % (ret_page.attrib["pageid"])
+ except Exception:
firstpartok = False
continue
else:
@@ -213,30 +243,34 @@ def getXMLPageWithApi(config: Config = None, title="", verbose=True, session=Non
yield ret
continueVal = None
- if root.find("continue") is not None:
+ continue_element: (ElementTree.Element | None) = root.find("continue")
+ query_continue_element: (ElementTree.Element | None) = root.find(
+ "query-continue"
+ )
+ if continue_element is not None:
# uses continue.rvcontinue
# MW 1.26+
continueKey = "rvcontinue"
- continueVal = root.find("continue").attrib["rvcontinue"]
- elif root.find("query-continue") is not None:
- revContinue = root.find("query-continue").find("revisions")
- assert revContinue is not None, "Should only have revisions continue"
- if "rvcontinue" in revContinue.attrib:
+ continueVal = continue_element.attrib["rvcontinue"]
+ elif query_continue_element is not None:
+ rev_continue = query_continue_element.find("revisions")
+ assert rev_continue is not None, "Should only have revisions continue"
+ if "rvcontinue" in rev_continue.attrib:
# MW 1.21 ~ 1.25
continueKey = "rvcontinue"
- continueVal = revContinue.attrib["rvcontinue"]
- elif "rvstartid" in revContinue.attrib:
+ continueVal = rev_continue.attrib["rvcontinue"]
+ elif "rvstartid" in rev_continue.attrib:
# TODO: MW ????
continueKey = "rvstartid"
- continueVal = revContinue.attrib["rvstartid"]
+ continueVal = rev_continue.attrib["rvstartid"]
else:
# blindly assume the first attribute is the continue key
# may never happen
assert (
- len(revContinue.attrib) > 0
+ len(rev_continue.attrib) > 0
), "Should have at least one attribute"
- for continueKey in revContinue.attrib.keys():
- continueVal = revContinue.attrib[continueKey]
+ for continueKey in rev_continue.attrib.keys():
+ continueVal = rev_continue.attrib[continueKey]
break
if continueVal is not None:
params[continueKey] = continueVal
@@ -246,7 +280,9 @@ def getXMLPageWithApi(config: Config = None, title="", verbose=True, session=Non
# transform the revision
rev_, edits = reconstructRevisions(root=root)
- xmldom = MD.parseString(b"" + ET.tostring(rev_) + b"")
+ xmldom = MD.parseString(
+ b"" + ElementTree.tostring(rev_) + b""
+ )
# convert it into text in case it throws MemoryError
# delete the first three line and last two line,which is for setting the indent
ret += "".join(xmldom.toprettyxml(indent=" ").splitlines(True)[3:-2])
@@ -254,7 +290,7 @@ def getXMLPageWithApi(config: Config = None, title="", verbose=True, session=Non
numberofedits += edits
if config.curonly or continueVal is None: # no continue
break
- except:
+ except Exception:
traceback.print_exc()
params["rvcontinue"] = lastcontinue
ret = ""
@@ -267,7 +303,9 @@ def getXMLPageWithApi(config: Config = None, title="", verbose=True, session=Non
"export": 1,
"exportnowrap": 1,
}
- xml = getXMLPageCoreWithApi(params=params, config=config, session=session)
+ xml = getXMLPageCoreWithApi(
+ headers={}, params=params, config=config, session=session
+ )
if xml == "":
raise ExportAbortedError(config.index)
if "" not in xml:
diff --git a/wikiteam3/dumpgenerator/dump/page/xmlexport/page_xml_export.py b/wikiteam3/dumpgenerator/dump/page/xmlexport/page_xml_export.py
index 350dbd36..7d67f55f 100644
--- a/wikiteam3/dumpgenerator/dump/page/xmlexport/page_xml_export.py
+++ b/wikiteam3/dumpgenerator/dump/page/xmlexport/page_xml_export.py
@@ -1,7 +1,7 @@
import re
import sys
import time
-from typing import *
+from typing import Dict
import requests
@@ -12,8 +12,12 @@
from wikiteam3.utils import uprint
+# headers: Dict = None, params: Dict = None
def getXMLPageCore(
- headers: Dict = None, params: Dict = None, config: Config = None, session=None
+ headers: Dict,
+ params: Dict[str, (str | int)],
+ config: Config,
+ session: requests.Session,
) -> str:
""""""
# returns a XML containing params['limit'] revisions (or current only), ending in
@@ -37,8 +41,8 @@ def getXMLPageCore(
time.sleep(wait)
# reducing server load requesting smallest chunks (if curonly then
# limit = 1 from mother function)
- if params["limit"] > 1:
- params["limit"] = params["limit"] / 2 # half
+ if int(params["limit"]) > 1:
+ params["limit"] = int(params["limit"]) // 2 # half
if c >= maxretries:
print(" We have retried %d times" % (c))
print(
@@ -52,9 +56,9 @@ def getXMLPageCore(
# params['curonly'] should mean that we've already tried this
# fallback, because it's set by the following if and passed to
# getXMLPageCore
- if not config.curonly and "curonly" not in params:
+ if not config.curonly: # and "curonly" not in params:
print(" Trying to save only the last revision for this page...")
- params["curonly"] = 1
+ params["curonly"] = True
logerror(
config=config,
to_stdout=True,
@@ -75,7 +79,7 @@ def getXMLPageCore(
try:
r = session.post(
url=config.index, params=params, headers=headers, timeout=10
- )
+ ) # type: ignore
handleStatusCode(r)
xml = r.text
except requests.exceptions.ConnectionError as e:
@@ -89,7 +93,9 @@ def getXMLPageCore(
return xml
-def getXMLPageWithExport(config: Config = None, title="", verbose=True, session=None):
+def getXMLPageWithExport(
+ config: Config, title: str, verbose: bool, session: requests.Session
+):
"""Get the full history (or current only) of a page"""
truncated = False
@@ -97,9 +103,17 @@ def getXMLPageWithExport(config: Config = None, title="", verbose=True, session=
title_ = re.sub(" ", "_", title_)
# do not convert & into %26, title_ = re.sub('&', '%26', title_)
if config.export:
- params = {"title": config.export, "pages": title_, "action": "submit"}
+ params: Dict[str, (str | int)] = {
+ "title": config.export,
+ "pages": title_,
+ "action": "submit",
+ }
else:
- params = {"title": "Special:Export", "pages": title_, "action": "submit"}
+ params = {
+ "title": "Special:Export",
+ "pages": title_,
+ "action": "submit",
+ }
if config.curonly:
params["curonly"] = 1
params["limit"] = 1
@@ -114,7 +128,7 @@ def getXMLPageWithExport(config: Config = None, title="", verbose=True, session=
if config.templates:
params["templates"] = 1
- xml = getXMLPageCore(params=params, config=config, session=session)
+ xml = getXMLPageCore(headers={}, params=params, config=config, session=session)
if xml == "":
raise ExportAbortedError(config.index)
if "" not in xml:
@@ -139,10 +153,12 @@ def getXMLPageWithExport(config: Config = None, title="", verbose=True, session=
# get the last timestamp from the acum XML
params["offset"] = re.findall(r_timestamp, xml)[-1]
try:
- xml2 = getXMLPageCore(params=params, config=config, session=session)
+ xml2 = getXMLPageCore(
+ headers={}, params=params, config=config, session=session
+ )
except MemoryError:
print("The page's history exceeds our memory, halving limit.")
- params["limit"] /= 2
+ params["limit"] = int(params["limit"]) // 2
continue
# are there more edits in this next XML chunk or no ?
@@ -177,7 +193,7 @@ def getXMLPageWithExport(config: Config = None, title="", verbose=True, session=
)
except MemoryError:
"The page's history exceeds our memory, halving limit."
- params["limit"] /= 2
+ params["limit"] = int(params["limit"]) // 2
continue
xml = xml2
edit_count += len(re.findall(r_timestamp, xml))
diff --git a/wikiteam3/dumpgenerator/dump/page/xmlrev/xml_revisions.py b/wikiteam3/dumpgenerator/dump/page/xmlrev/xml_revisions.py
index 1af38c9c..958072d6 100644
--- a/wikiteam3/dumpgenerator/dump/page/xmlrev/xml_revisions.py
+++ b/wikiteam3/dumpgenerator/dump/page/xmlrev/xml_revisions.py
@@ -1,14 +1,15 @@
import sys
import time
-from datetime import datetime
-from typing import *
+from typing import List
from urllib.parse import urlparse
import lxml.etree
import mwclient
import requests
+from lxml.etree import _ElementTree as ElementTree
+from mwclient.errors import InvalidResponse, MwClientError
-from wikiteam3.dumpgenerator.api.namespaces import getNamespacesAPI
+# from wikiteam3.dumpgenerator.api.namespaces import getNamespacesAPI
from wikiteam3.dumpgenerator.api.page_titles import readTitles
from wikiteam3.dumpgenerator.config import Config
from wikiteam3.dumpgenerator.dump.page.xmlrev.xml_revisions_page import (
@@ -22,9 +23,8 @@
def getXMLRevisionsByAllRevisions(
- config: Config = None,
- session=None,
- site: mwclient.Site = None,
+ config: Config,
+ site: mwclient.Site, # = None,
nscontinue=None,
arvcontinue=None,
):
@@ -62,55 +62,7 @@ def getXMLRevisionsByAllRevisions(
if _arvcontinue is not None:
arvparams["arvcontinue"] = _arvcontinue
- if not config.curonly:
- # We have to build the XML manually...
- # Skip flags, presumably needed to add which is in the schema.
- # Also missing: parentid and contentformat.
- arvparams[
- "arvprop"
- ] = "ids|timestamp|user|userid|size|sha1|contentmodel|comment|content|flags"
- print(
- "Trying to get wikitext from the allrevisions API and to build the XML"
- )
- while True:
- try:
- arvrequest = site.api(http_method=config.http_method, **arvparams)
- except requests.exceptions.HTTPError as e:
- if e.response.status_code != 405 or config.http_method != "POST":
- raise
- print("POST request to the API failed, retrying with GET")
- config.http_method = "GET"
- continue
- except requests.exceptions.ReadTimeout as err:
- # Hopefully temporary, just wait a bit and continue with the same request.
- # No point putting a limit to retries, we'd need to abort everything.
- # TODO: reuse the retry logic of the checkAPI phase? Or force mwclient
- # to use the retry adapter we use for our own requests session?
- print(f"ERROR: {str(err)}")
- print("Sleeping for 20 seconds")
- time.sleep(20)
- continue
- except mwclient.errors.InvalidResponse as e:
- if (
- not e.response_text.startswith("")
- or config.http_method != "POST"
- ):
- raise
-
- print(
- "POST request to the API failed (got HTML), retrying with GET"
- )
- config.http_method = "GET"
- continue
- for page in arvrequest["query"]["allrevisions"]:
- yield makeXmlFromPage(page, arvparams.get("arvcontinue", ""))
- if "continue" in arvrequest:
- arvparams["arvcontinue"] = arvrequest["continue"]["arvcontinue"]
- else:
- # End of continuation. We are done with this namespace.
- break
-
- else:
+ if config.curonly:
# FIXME: this is not curonly, just different strategy to do all revisions
# Just cycle through revision IDs and use the XML as is
print("Trying to list the revisions and to export them one by one")
@@ -189,22 +141,69 @@ def getXMLRevisionsByAllRevisions(
)
except requests.exceptions.ReadTimeout as err:
# As above
- print(f"ERROR: {str(err)}")
- print("Sleeping for 20 seconds")
+ print(f"ERROR: {str(err)}\nSleeping for 20 seconds")
time.sleep(20)
# But avoid rewriting the same revisions
arvrequest["query"]["allrevisions"] = []
+ else:
+ # We have to build the XML manually...
+ # Skip flags, presumably needed to add which is in the schema.
+ # Also missing: parentid and contentformat.
+ arvparams[
+ "arvprop"
+ ] = "ids|timestamp|user|userid|size|sha1|contentmodel|comment|content|flags"
+ print(
+ "Trying to get wikitext from the allrevisions API and to build the XML"
+ )
+ while True:
+ try:
+ arvrequest = site.api(http_method=config.http_method, **arvparams)
+ except requests.exceptions.HTTPError as e:
+ if e.response.status_code != 405 or config.http_method != "POST":
+ raise
+ print("POST request to the API failed, retrying with GET")
+ config.http_method = "GET"
+ continue
+ except requests.exceptions.ReadTimeout as err:
+ # Hopefully temporary, just wait a bit and continue with the same request.
+ # No point putting a limit to retries, we'd need to abort everything.
+ # TODO: reuse the retry logic of the checkAPI phase? Or force mwclient
+ # to use the retry adapter we use for our own requests session?
+ print(f"ERROR: {str(err)}")
+ print("Sleeping for 20 seconds")
+ time.sleep(20)
+ continue
+ except InvalidResponse as e:
+ if (
+ e.response_text is not None
+ and not e.response_text.startswith("")
+ ) or config.http_method != "POST":
+ raise
+
+ print(
+ "POST request to the API failed (got HTML), retrying with GET"
+ )
+ config.http_method = "GET"
+ continue
+ for page in arvrequest["query"]["allrevisions"]:
+ yield makeXmlFromPage(page, arvparams.get("arvcontinue", ""))
+ if "continue" in arvrequest:
+ arvparams["arvcontinue"] = arvrequest["continue"]["arvcontinue"]
+ else:
+ # End of continuation. We are done with this namespace.
+ break
+
def getXMLRevisionsByTitles(
- config: Config = None, session=None, site: mwclient.Site = None, start=None
+ config: Config, session: requests.Session, site: mwclient.Site, start: str
):
c = 0
if config.curonly:
# The raw XML export in the API gets a title and gives the latest revision.
# We could also use the allpages API as generator but let's be consistent.
print("Getting titles to export the latest revision for each")
- for title in readTitles(config, session=session, start=start):
+ for title in readTitles(config, session=session, start=start, batch=False):
# TODO: respect verbose flag, reuse output from getXMLPage
print(f" {title}")
# TODO: as we're doing one page and revision at a time, we might
@@ -238,7 +237,7 @@ def getXMLRevisionsByTitles(
# The XML needs to be made manually because the export=1 option
# refuses to return an arbitrary number of revisions (see above).
print("Getting titles to export all the revisions of each")
- titlelist = []
+ titlelist: (str | List[str]) = []
# TODO: Decide a suitable number of a batched request. Careful:
# batched responses may not return all revisions.
for titlelist in readTitles(config, session=session, start=start, batch=False):
@@ -248,9 +247,11 @@ def getXMLRevisionsByTitles(
print(f" {title}")
# Try and ask everything. At least on MediaWiki 1.16, uknown props are discarded:
# "warnings":{"revisions":{"*":"Unrecognized values for parameter 'rvprop': userid, sha1, contentmodel"}}}
+ if titlelist is List:
+ titlelist = "|".join(titlelist)
pparams = {
"action": "query",
- "titles": "|".join(titlelist),
+ "titles": titlelist,
"prop": "revisions",
"rvlimit": config.api_chunksize,
"rvprop": "ids|timestamp|user|userid|size|sha1|contentmodel|comment|content|flags",
@@ -263,11 +264,13 @@ def getXMLRevisionsByTitles(
print("POST request to the API failed, retrying with GET")
config.http_method = "GET"
prequest = site.api(http_method=config.http_method, **pparams)
- except mwclient.errors.InvalidResponse:
+ except InvalidResponse:
+ if titlelist is List:
+ titlelist = "; ".join(titlelist)
logerror(
config=config,
to_stdout=True,
- text=f'Error: page inaccessible? Could not export page: {"; ".join(titlelist)}',
+ text=f"Error: page inaccessible? Could not export page: {titlelist}",
)
continue
@@ -279,10 +282,12 @@ def getXMLRevisionsByTitles(
try:
pages = prequest["query"]["pages"]
except KeyError:
+ if titlelist is List:
+ titlelist = "; ".join(titlelist)
logerror(
config=config,
to_stdout=True,
- text=f'Error: page inaccessible? Could not export page: {"; ".join(titlelist)}',
+ text=f"Error: page inaccessible? Could not export page: {titlelist}",
)
break
# Go through the data we got to build the XML.
@@ -290,10 +295,12 @@ def getXMLRevisionsByTitles(
try:
yield makeXmlFromPage(pages[pageid], None)
except PageMissingError:
+ if titlelist is List:
+ titlelist = "; ".join(titlelist)
logerror(
config=config,
to_stdout=True,
- text=f'Error: empty revision from API. Could not export page: {"; ".join(titlelist)}',
+ text=f"Error: empty revision from API. Could not export page: {titlelist}",
)
continue
@@ -324,8 +331,12 @@ def getXMLRevisionsByTitles(
print(f"\n-> Downloaded {c} pages\n")
+# useAllrevision=True, lastPage=None
def getXMLRevisions(
- config: Config = None, session=None, useAllrevision=True, lastPage=None
+ config: Config,
+ session: requests.Session,
+ useAllrevision: bool,
+ lastPage: (ElementTree | None),
):
# FIXME: actually figure out the various strategies for each MediaWiki version
apiurl = urlparse(config.api)
@@ -342,7 +353,7 @@ def getXMLRevisions(
# Find last title
if lastPage is not None:
try:
- lastNs = int(lastPage.find("ns").text)
+ lastNs = int(lastPage.find("ns", None).text)
lastArvcontinue = lastPage.attrib["arvcontinue"]
except Exception:
print(
@@ -350,43 +361,38 @@ def getXMLRevisions(
)
raise
nscontinue = lastNs
- arvcontinue = lastArvcontinue
- if not arvcontinue:
- arvcontinue = None
+ arvcontinue = lastArvcontinue or None
else:
nscontinue = None
arvcontinue = None
try:
- return getXMLRevisionsByAllRevisions(
- config, session, site, nscontinue, arvcontinue
- )
- except (KeyError, mwclient.errors.InvalidResponse) as e:
- print(e)
+ return getXMLRevisionsByAllRevisions(config, site, nscontinue, arvcontinue)
+ except (KeyError, InvalidResponse) as e:
# TODO: check whether the KeyError was really for a missing arv API
print(
- "Warning. Could not use allrevisions. Wiki too old? Try to use --xmlrevisions_page"
+ f"{str(e)}/nWarning. Could not use allrevisions. Wiki too old? Try to use --xmlrevisions_page"
)
sys.exit()
else:
# Find last title
if lastPage is not None:
try:
- start = lastPage.find("title")
+ start = lastPage.find("title", None)
except Exception:
print(
f"Failed to find title in last trunk XML: {lxml.etree.tostring(lastPage)}"
)
raise
else:
- start = None
+ start = ""
try:
# # Uncomment these lines to raise an KeyError for testing
# raise KeyError(999999)
# # DO NOT UNCOMMMENT IN RELEASE
return getXMLRevisionsByTitles(config, session, site, start)
- except mwclient.errors.MwClientError as e:
+ except MwClientError as e:
print(e)
print("This mwclient version seems not to work for us. Exiting.")
sys.exit()
diff --git a/wikiteam3/dumpgenerator/dump/page/xmlrev/xml_revisions_page.py b/wikiteam3/dumpgenerator/dump/page/xmlrev/xml_revisions_page.py
index a249a269..b57d03d2 100644
--- a/wikiteam3/dumpgenerator/dump/page/xmlrev/xml_revisions_page.py
+++ b/wikiteam3/dumpgenerator/dump/page/xmlrev/xml_revisions_page.py
@@ -6,7 +6,7 @@
def makeXmlPageFromRaw(xml, arvcontinue) -> str:
"""Discard the metadata around a element in string"""
- root = etree.XML(xml)
+ root = etree.XML(text=xml, parser=None)
find = etree.XPath("//*[local-name() = 'page']")
page = find(root)[0]
if arvcontinue is not None:
@@ -14,7 +14,7 @@ def makeXmlPageFromRaw(xml, arvcontinue) -> str:
# The tag will inherit the namespace, like:
#
# FIXME: pretty_print doesn't seem to work, only adds a newline
- return etree.tostring(page, pretty_print=True, encoding="unicode")
+ return etree.tostring(page, pretty_print=True, encoding="unicode") # type: ignore
def makeXmlFromPage(page: dict, arvcontinue) -> str:
@@ -124,4 +124,4 @@ def makeXmlFromPage(page: dict, arvcontinue) -> str:
except KeyError as e:
print(e)
raise PageMissingError(page["title"], e)
- return etree.tostring(p, pretty_print=True, encoding="unicode")
+ return etree.tostring(p, pretty_print=True, encoding="unicode") # type: ignore
diff --git a/wikiteam3/dumpgenerator/dump/xmldump/xml_dump.py b/wikiteam3/dumpgenerator/dump/xmldump/xml_dump.py
index 991323dd..d8a46546 100644
--- a/wikiteam3/dumpgenerator/dump/xmldump/xml_dump.py
+++ b/wikiteam3/dumpgenerator/dump/xmldump/xml_dump.py
@@ -1,8 +1,12 @@
import re
import sys
-from typing import *
+from io import TextIOWrapper
import lxml.etree
+import requests
+
+# from typing import *
+from lxml.etree import _ElementTree as ElementTree
from wikiteam3.dumpgenerator.api.page_titles import readTitles
from wikiteam3.dumpgenerator.cli import Delay
@@ -19,12 +23,14 @@
from wikiteam3.utils import cleanXML, domain2prefix, undoHTMLEntities
+# lastPage=None,
+# useAllrevisions=False,
def doXMLRevisionDump(
- config: Config = None,
- session=None,
- xmlfile=None,
- lastPage=None,
- useAllrevisions=False,
+ config: Config,
+ session: requests.Session,
+ xmlfile: TextIOWrapper,
+ lastPage: (ElementTree | None),
+ useAllrevisions: bool,
):
try:
r_timestamp = "([^<]+)"
@@ -41,16 +47,17 @@ def doXMLRevisionDump(
if arvcontinueRe := re.findall(r_arvcontinue, xml):
curArvcontinue = arvcontinueRe[0]
if lastArvcontinue != curArvcontinue:
- Delay(config=config, session=session)
+ Delay(config=config)
lastArvcontinue = curArvcontinue
# Due to how generators work, it's expected this may be less
xml = cleanXML(xml=xml)
xmlfile.write(xml)
xmltitle = re.search(r"([^<]+)", xml)
- title = undoHTMLEntities(text=xmltitle.group(1))
- print(f"{title}, {numrevs} edits (--xmlrevisions)")
- # Delay(config=config, session=session)
+ if xmltitle is not None:
+ title = undoHTMLEntities(text=xmltitle[1])
+ print(f"{title}, {numrevs} edits (--xmlrevisions)")
+ # Delay(config=config)
except AttributeError as e:
print(e)
print("This API library version is not working")
@@ -59,11 +66,13 @@ def doXMLRevisionDump(
print(e)
-def doXMLExportDump(config: Config = None, session=None, xmlfile=None, lastPage=None):
+def doXMLExportDump(
+ config: Config, session: requests.Session, xmlfile: TextIOWrapper, lastPage=None
+):
print("\nRetrieving the XML for every page\n")
lock = True
- start = None
+ start: str = ""
if lastPage is not None:
try:
start = lastPage.find("title").text
@@ -77,18 +86,20 @@ def doXMLExportDump(config: Config = None, session=None, xmlfile=None, lastPage=
lock = False
c = 1
- for title in readTitles(config, session=session, start=start):
- if not title:
+ for title in readTitles(config, session=session, start=start, batch=False):
+ if title is not str or title == "":
continue
if title == start: # start downloading from start, included
lock = False
if lock:
continue
- Delay(config=config, session=session)
+ Delay(config=config)
if c % 10 == 0:
print(f"\n-> Downloaded {c} pages\n")
try:
- for xml in getXMLPage(config=config, title=title, session=session):
+ for xml in getXMLPage(
+ config=config, verbose=True, title=title, session=session
+ ):
xml = cleanXML(xml=xml)
xmlfile.write(xml)
except PageMissingError:
@@ -104,7 +115,8 @@ def doXMLExportDump(config: Config = None, session=None, xmlfile=None, lastPage=
c += 1
-def generateXMLDump(config: Config = None, resume=False, session=None):
+# resume=False
+def generateXMLDump(config: Config, resume: bool, session: requests.Session):
"""Generates a XML dump for a list of titles or from revision IDs"""
header, config = getXMLHeader(config=config, session=session)
@@ -114,9 +126,9 @@ def generateXMLDump(config: Config = None, resume=False, session=None):
config.date,
"current" if config.curonly else "history",
)
- xmlfile = None
+ xmlfile: TextIOWrapper
- lastPage = None
+ lastPage: (ElementTree | None) = None
lastPageChunk = None
# start != None, means we are resuming a XML dump
if resume:
@@ -128,8 +140,9 @@ def generateXMLDump(config: Config = None, resume=False, session=None):
resume = False
lastPage = None
else:
- lastPage = parseLastPageChunk(lastPageChunk)
- if lastPage is None:
+ try:
+ lastPage = parseLastPageChunk(lastPageChunk)
+ except lxml.etree.LxmlError:
print("Failed to parse last page chunk: \n%s" % lastPageChunk)
print("Cannot resume, exiting now!")
sys.exit(1)
diff --git a/wikiteam3/dumpgenerator/dump/xmldump/xml_header.py b/wikiteam3/dumpgenerator/dump/xmldump/xml_header.py
index f3602439..e95129a2 100644
--- a/wikiteam3/dumpgenerator/dump/xmldump/xml_header.py
+++ b/wikiteam3/dumpgenerator/dump/xmldump/xml_header.py
@@ -1,7 +1,8 @@
+import contextlib
import json
import re
import sys
-from typing import *
+from typing import Tuple
import requests
@@ -11,31 +12,29 @@
from wikiteam3.dumpgenerator.log import logerror
-def getXMLHeader(config: Config = None, session=None) -> Tuple[str, Config]:
+def getXMLHeader(config: Config, session: requests.Session) -> Tuple[str, Config]:
"""Retrieve a random page to extract XML headers (namespace info, etc)"""
print(config.api)
xml = ""
disableSpecialExport = config.xmlrevisions or config.xmlapiexport
randomtitle = "Main_Page"
if disableSpecialExport and config.api and config.api.endswith("api.php"):
- try:
+ with contextlib.suppress(requests.exceptions.RetryError):
print("Getting the XML header from the API")
# Export and exportnowrap exist from MediaWiki 1.15, allpages from 1.8
r = session.get(
f"{config.api}?action=query&export=1&exportnowrap=1&list=allpages&aplimit=1",
timeout=10,
)
- xml: str = r.text
+ xml = r.text
# Otherwise try without exportnowrap, e.g. Wikia returns a blank page on 1.19
if not re.match(r"\s* Tuple[str, Config]:
f"{config.api}?action=query&export=1&format=json&titles={randomtitle}",
timeout=10,
)
- try:
+ with contextlib.suppress(KeyError):
xml = r.json()["query"]["export"]["*"]
- except KeyError:
- pass
- except requests.exceptions.RetryError:
- pass
else:
try:
@@ -72,36 +67,36 @@ def getXMLHeader(config: Config = None, session=None) -> Tuple[str, Config]:
# The does not exist. Not a problem, if we get the .
xml = pme.xml
except ExportAbortedError:
- try:
- if config.api:
- print("Trying the local name for the Special namespace instead")
- r = session.get(
- url=config.api,
- params={
- "action": "query",
- "meta": "siteinfo",
- "siprop": "namespaces",
- "format": "json",
- },
- timeout=120,
- )
- config.export = (
- json.loads(r.text)["query"]["namespaces"]["-1"]["*"] + ":Export"
- )
- xml = "".join(
- list(
- getXMLPage(
- config=config,
- title=randomtitle,
- verbose=False,
- session=session,
+ with contextlib.suppress(ExportAbortedError):
+ try:
+ if config.api:
+ print("Trying the local name for the Special namespace instead")
+ r = session.get(
+ url=config.api,
+ params={
+ "action": "query",
+ "meta": "siteinfo",
+ "siprop": "namespaces",
+ "format": "json",
+ },
+ timeout=120,
+ )
+ config.export = (
+ json.loads(r.text)["query"]["namespaces"]["-1"]["*"]
+ + ":Export"
+ )
+ xml = "".join(
+ list(
+ getXMLPage(
+ config=config,
+ title=randomtitle,
+ verbose=False,
+ session=session,
+ )
)
)
- )
- except PageMissingError as pme:
- xml = pme.xml
- except ExportAbortedError:
- pass
+ except PageMissingError as pme:
+ xml = pme.xml
header = xml.split("")[0]
if not re.match(r"\s* Tuple[str, Config]:
print(xml)
print("XML export on this wiki is broken, quitting.")
logerror(
- to_stdout=True, text="XML export on this wiki is broken, quitting."
+ config=config,
+ to_stdout=True,
+ text="XML export on this wiki is broken, quitting.",
)
sys.exit()
return header, config
diff --git a/wikiteam3/dumpgenerator/dump/xmldump/xml_integrity.py b/wikiteam3/dumpgenerator/dump/xmldump/xml_integrity.py
index 5f17d156..819ff29b 100644
--- a/wikiteam3/dumpgenerator/dump/xmldump/xml_integrity.py
+++ b/wikiteam3/dumpgenerator/dump/xmldump/xml_integrity.py
@@ -1,10 +1,10 @@
-from typing import *
+from typing import Iterable
from wikiteam3.dumpgenerator.config import Config
def checkXMLIntegrity(
- config: Config = None, titles: Iterable[str] = None, session=None
+ config: Config, titles: (Iterable[str] | None) = None, session=None
):
"""Check XML dump integrity, to detect broken XML chunks"""
# TODO: Fix XML Integrity Check
diff --git a/wikiteam3/dumpgenerator/dump/xmldump/xml_truncate.py b/wikiteam3/dumpgenerator/dump/xmldump/xml_truncate.py
index 3cfb5528..fe73be2e 100644
--- a/wikiteam3/dumpgenerator/dump/xmldump/xml_truncate.py
+++ b/wikiteam3/dumpgenerator/dump/xmldump/xml_truncate.py
@@ -1,9 +1,9 @@
import os
from io import StringIO
-from typing import *
import lxml.etree
from file_read_backwards import FileReadBackwards
+from lxml.etree import _ElementTree as ElementTree
def endsWithNewlines(filename: str) -> int:
@@ -60,10 +60,9 @@ def truncateXMLDump(filename: str) -> str:
return incomplete_segment
-def parseLastPageChunk(chunk) -> Optional[lxml.etree._ElementTree]:
- try:
- parser = lxml.etree.XMLParser(recover=True)
- tree = lxml.etree.parse(StringIO(chunk), parser)
- return tree.getroot()
- except lxml.etree.LxmlError:
- return None
+def parseLastPageChunk(chunk) -> ElementTree:
+ parser = lxml.etree.XMLParser(recover=True)
+ tree = lxml.etree.parse(StringIO(chunk), parser)
+ return tree.getroot()
+ # except lxml.etree.LxmlError:
+ # return None
diff --git a/wikiteam3/dumpgenerator/log/log_error.py b/wikiteam3/dumpgenerator/log/log_error.py
index 7f18fbf9..5902ac9a 100644
--- a/wikiteam3/dumpgenerator/log/log_error.py
+++ b/wikiteam3/dumpgenerator/log/log_error.py
@@ -3,7 +3,7 @@
from wikiteam3.dumpgenerator.config import Config
-def logerror(config: Config = None, to_stdout=False, text="") -> None:
+def logerror(config: Config, to_stdout=False, text="") -> None:
"""Log error in errors.log"""
if text:
with open(f"{config.path}/errors.log", "a", encoding="utf-8") as outfile:
diff --git a/wikiteam3/dumpgenerator/test/test_config.py b/wikiteam3/dumpgenerator/test/test_config.py
index da9869e5..ce6521a3 100644
--- a/wikiteam3/dumpgenerator/test/test_config.py
+++ b/wikiteam3/dumpgenerator/test/test_config.py
@@ -25,7 +25,7 @@ def _new_config_from_parameter(params):
def get_config(mediawiki_ver, api=True):
- assert api == True
+ assert api == True # type: ignore
if mediawiki_ver == "1.16.5":
return _new_config_from_parameter(
[
@@ -33,3 +33,4 @@ def get_config(mediawiki_ver, api=True):
"http://group0.mediawiki.demo.save-web.org/mediawiki-1.16.5/api.php",
]
)
+ raise ValueError(f"Expected mediawiki_ver '1.16.5'; got {mediawiki_ver}")
diff --git a/wikiteam3/gui.py b/wikiteam3/gui.py
index e4f2cfec..a3cfb3d9 100644
--- a/wikiteam3/gui.py
+++ b/wikiteam3/gui.py
@@ -22,7 +22,7 @@
* advanced: batch downloads, upload to Internet Archive or anywhere
"""
-
+import contextlib
import os
import platform
import random
@@ -129,7 +129,7 @@ def __init__(self, master):
self.button11 = Button(
self.labelframe11,
text="Check",
- command=lambda: threading.start_new_threading(self.checkURL, ()),
+ command=lambda: threading.start_new_threading(self.checkURL, ()), # type: ignore
width=5,
)
self.button11.grid(row=0, column=3)
@@ -275,14 +275,14 @@ def __init__(self, master):
self.button21 = Button(
self.frame2,
text="Load available dumps",
- command=lambda: threading.start_new_threading(self.loadAvailableDumps, ()),
+ command=lambda: threading.start_new_threading(self.loadAvailableDumps, ()), # type: ignore
width=15,
)
self.button21.grid(row=3, column=0)
self.button23 = Button(
self.frame2,
text="Download selection",
- command=lambda: threading.start_new_threading(self.downloadDump, ()),
+ command=lambda: threading.start_new_threading(self.downloadDump, ()), # type: ignore
width=15,
)
self.button23.grid(row=3, column=4)
@@ -337,7 +337,7 @@ def checkURL(self):
): # well-constructed URL?, one dot at least, aaaaa.com, but bb.aaaaa.com is allowed too
if self.optionmenu11var.get() == "api.php":
self.msg("Please wait... Checking api.php...")
- if checkAPI(self.entry11.get()):
+ if checkAPI(self.entry11.get(), None): # type: ignore
self.entry11.config(background="lightgreen")
self.msg("api.php is correct!", level="ok")
else:
@@ -345,7 +345,7 @@ def checkURL(self):
self.msg("api.php is incorrect!", level="error")
elif self.optionmenu11var.get() == "index.php":
self.msg("Please wait... Checking index.php...")
- if checkIndex(self.entry11.get()):
+ if checkIndex(self.entry11.get(), None): # type: ignore
self.entry11.config(background="lightgreen")
self.msg("index.php is OK!", level="ok")
else:
@@ -374,7 +374,7 @@ def sumSizes(self, sizes):
def run(self):
for _ in range(10):
time.sleep(0.1)
- self.value += 10
+ self.value += 10 # type: ignore
"""
#get parameters selected
@@ -388,7 +388,7 @@ def run(self):
def msg(self, msg="", level=""):
levels = {"ok": "lightgreen", "warning": "yellow", "error": "red"}
- if levels.has_key(level.lower()):
+ if level.lower() in levels:
print(f"{level.upper()}: {msg}")
self.status.config(
text=f"{level.upper()}: {msg}", background=levels[level.lower()]
@@ -398,9 +398,9 @@ def msg(self, msg="", level=""):
self.status.config(text=msg, background="grey")
def treeSortColumn(self, column, reverse=False):
- l = [(self.tree.set(i, column), i) for i in self.tree.get_children("")]
- l.sort(reverse=reverse)
- for index, (val, i) in enumerate(l):
+ line = [(self.tree.set(i, column), i) for i in self.tree.get_children("")]
+ line.sort(reverse=reverse)
+ for index, (val, i) in enumerate(line):
self.tree.move(i, "", index)
self.tree.heading(
column,
@@ -408,7 +408,7 @@ def treeSortColumn(self, column, reverse=False):
)
def downloadProgress(self, block_count, block_size, total_size):
- try:
+ with contextlib.suppress(Exception):
total_mb = total_size / 1024 / 1024.0
downloaded = block_count * (block_size / 1024 / 1024.0)
percent = downloaded / (total_mb / 100.0)
@@ -419,8 +419,6 @@ def downloadProgress(self, block_count, block_size, total_size):
self.msg(msg, level="ok")
# sys.stdout.write("%.1f MB of %.1f MB downloaded (%.2f%%)" %(downloaded, total_mb, percent))
# sys.stdout.flush()
- except:
- pass
def downloadDump(self, event=None):
if self.block:
@@ -452,7 +450,7 @@ def downloadDump(self, event=None):
self.dumps[int(item)][5],
)
)
- f = urllib.urlretrieve(
+ urllib.urlretrieve( # type: ignore
self.dumps[int(item)][5],
filepath,
reporthook=self.downloadProgress,
@@ -614,11 +612,11 @@ def loadAvailableDumps(self):
],
]
wikifarms_r = re.compile(f'({"|".join(wikifarms.keys())})')
- c = 0
+ # c = 0
for mirror, url, regexp in self.urls:
print("Loading data from", mirror, url)
self.msg(msg=f"Please wait... Loading data from {mirror} {url}")
- f = urllib.request.urlopen(url)
+ f = urllib.request.urlopen(url) # type: ignore
m = re.compile(regexp).finditer(f.read())
for i in m:
filename = i.group("filename")
@@ -628,9 +626,7 @@ def loadAvailableDumps(self):
if re.search(wikifarms_r, filename):
wikifarm = re.findall(wikifarms_r, filename)[0]
wikifarm = wikifarms[wikifarm]
- size = i.group("size")
- if not size:
- size = "Unknown"
+ size = i.group("size") or "Unknown"
date = "Unknown"
if re.search(r"\-(\d{8})[\.-]", filename):
date = re.findall(r"\-(\d{4})(\d{2})(\d{2})[\.-]", filename)[0]
diff --git a/wikiteam3/uploader.py b/wikiteam3/uploader.py
index d4b4ede0..cee7ae55 100644
--- a/wikiteam3/uploader.py
+++ b/wikiteam3/uploader.py
@@ -15,12 +15,9 @@
# along with this program. If not, see .
import argparse
-import getopt
import hashlib
-import os
import re
import shutil
-import subprocess
import time
import urllib.parse
from io import BytesIO
@@ -95,6 +92,7 @@ def upload(wikis, logfile, config={}, uploadeddumps=[]):
prefix = domain2prefix(Config(api=wiki))
except KeyError:
print("ERROR: could not produce the prefix for %s" % wiki)
+ continue
wikiname = prefix.split("-")[0]
dumps = []
@@ -163,29 +161,29 @@ def upload(wikis, logfile, config={}, uploadeddumps=[]):
r = requests.get(url=wiki, params=params, headers=headers)
if r.status_code < 400:
xml = r.text
- except requests.exceptions.ConnectionError as e:
+ except requests.exceptions.ConnectionError:
pass
sitename = ""
baseurl = ""
lang = ""
try:
- sitename = re.findall(r"sitename=\"([^\"]+)\"", xml)[0]
- except:
+ sitename = re.findall(r"sitename=\"([^\"]+)\"", xml)[0] # type: ignore
+ except Exception:
pass
try:
- baseurl = re.findall(r"base=\"([^\"]+)\"", xml)[0]
- except:
+ baseurl = re.findall(r"base=\"([^\"]+)\"", xml)[0] # type: ignore
+ except Exception:
pass
try:
- lang = re.findall(r"lang=\"([^\"]+)\"", xml)[0]
- except:
+ lang = re.findall(r"lang=\"([^\"]+)\"", xml)[0] # type: ignore
+ except Exception:
pass
if not sitename:
sitename = wikiname
if not baseurl:
- baseurl = re.sub(r"(?im)/api\.php", r"", wiki)
+ baseurl = re.sub(r"(?im)/api\.php", r"", wiki) # type: ignore
# Convert protocol-relative URLs
baseurl = re.sub("^//", "https://", baseurl)
if lang:
@@ -207,7 +205,7 @@ def upload(wikis, logfile, config={}, uploadeddumps=[]):
r = requests.get(url=wiki, params=params, headers=headers)
if r.status_code < 400:
xml = r.text
- except requests.exceptions.ConnectionError as e:
+ except requests.exceptions.ConnectionError:
pass
rightsinfourl = ""
@@ -215,7 +213,7 @@ def upload(wikis, logfile, config={}, uploadeddumps=[]):
try:
rightsinfourl = re.findall(r"rightsinfo url=\"([^\"]+)\"", xml)[0]
rightsinfotext = re.findall(r"text=\"([^\"]+)\"", xml)[0]
- except:
+ except Exception:
pass
raw = ""
@@ -223,7 +221,7 @@ def upload(wikis, logfile, config={}, uploadeddumps=[]):
r = requests.get(url=baseurl, headers=headers)
if r.status_code < 400:
raw = r.text
- except requests.exceptions.ConnectionError as e:
+ except requests.exceptions.ConnectionError:
pass
# or copyright info from #footer in mainpage
@@ -235,13 +233,13 @@ def upload(wikis, logfile, config={}, uploadeddumps=[]):
rightsinfourl = re.findall(
r"", raw
)[0]
- except:
+ except Exception:
pass
try:
rightsinfotext = re.findall(
r"([^\n\r]*?)", raw
)[0]
- except:
+ except Exception:
pass
if rightsinfotext and not rightsinfourl:
rightsinfourl = baseurl + "#footer"
@@ -260,7 +258,7 @@ def upload(wikis, logfile, config={}, uploadeddumps=[]):
if "http" not in logourl:
# Probably a relative path, construct the absolute path
logourl = urllib.parse.urljoin(wiki, logourl)
- except:
+ except Exception:
pass
# retrieve some info from the wiki
@@ -323,7 +321,7 @@ def upload(wikis, logfile, config={}, uploadeddumps=[]):
try:
item.upload(
str(dump),
- metadata=md,
+ metadata=md, # type: ignore
access_key=ia_keys["access"],
secret_key=ia_keys["secret"],
verbose=True,
@@ -341,12 +339,14 @@ def upload(wikis, logfile, config={}, uploadeddumps=[]):
# Update metadata
r = item.modify_metadata(
- md, access_key=ia_keys["access"], secret_key=ia_keys["secret"]
+ md, # type: ignore
+ access_key=ia_keys["access"],
+ secret_key=ia_keys["secret"],
)
- if r.status_code != 200:
+ if r.status_code != 200: # type: ignore
print("Error when updating metadata")
- print(r.status_code)
- print(r.text)
+ print(r.status_code) # type: ignore
+ print(r.text) # type: ignore
print(
"You can find it in https://archive.org/details/%s" % (identifier)
@@ -358,11 +358,11 @@ def upload(wikis, logfile, config={}, uploadeddumps=[]):
try:
log(logfile, wiki, dump, "ok")
if logourl:
- logo = BytesIO(requests.get(logourl, timeout=10).content)
+ logo = BytesIO(requests.get(logourl, timeout=10).content) # type: ignore
if ".png" in logourl:
logoextension = "png"
- elif logourl.split("."):
- logoextension = logourl.split(".")[-1]
+ elif logourl.split("."): # type: ignore
+ logoextension = logourl.split(".")[-1] # type: ignore
else:
logoextension = "unknown"
logoname = "wiki-" + wikiname + "_logo." + logoextension
@@ -388,6 +388,7 @@ def main(params=[]):
Dumps must be in the same directory and follow the -wikidump.7z/-history.xml.7z format
as produced by launcher.py (explained in https://github.com/WikiTeam/wikiteam/wiki/Tutorial#Publishing_the_dump ).
You need a file named keys.txt with access and secret keys, in two different lines
+You also need py in the same directory as this script.
Use --help to print this help."""
)
@@ -409,11 +410,11 @@ def main(params=[]):
listfile = config.listfile
try:
uploadeddumps = [
- l.split(";")[1]
- for l in open("uploader-%s.log" % (listfile)).read().strip().splitlines()
- if len(l.split(";")) > 1
+ line.split(";")[1]
+ for line in open("uploader-%s.log" % (listfile)).read().strip().splitlines()
+ if len(line.split(";")) > 1
]
- except:
+ except Exception:
pass
if config.logfile is None:
diff --git a/wikiteam3/utils/__init__.py b/wikiteam3/utils/__init__.py
index f05f8ca9..518f689c 100644
--- a/wikiteam3/utils/__init__.py
+++ b/wikiteam3/utils/__init__.py
@@ -1,7 +1,9 @@
from .domain import domain2prefix
-from .login import botLogin, clientLogin, fetchLoginToken, indexLogin, uniLogin
+from .login import botLogin, clientLogin, indexLogin, uniLogin
from .monkey_patch import mod_requests_text
from .uprint import uprint
from .user_agent import getUserAgent
from .util import cleanHTML, cleanXML, removeIP, sha1File, undoHTMLEntities
from .wiki_avoid import avoidWikimediaProjects
+
+__all__ = [domain2prefix, botLogin, clientLogin, indexLogin, uniLogin, mod_requests_text, uprint, getUserAgent, cleanHTML, cleanXML, removeIP, sha1File, undoHTMLEntities, avoidWikimediaProjects] # type: ignore
diff --git a/wikiteam3/utils/domain.py b/wikiteam3/utils/domain.py
index aad0d05d..8a230d86 100644
--- a/wikiteam3/utils/domain.py
+++ b/wikiteam3/utils/domain.py
@@ -3,7 +3,7 @@
from wikiteam3.dumpgenerator.config import Config
-def domain2prefix(config: Config = None, session=None):
+def domain2prefix(config: Config):
"""Convert domain name to a valid prefix filename."""
# At this point, both api and index are supposed to be defined
diff --git a/wikiteam3/utils/login/__init__.py b/wikiteam3/utils/login/__init__.py
index f16f2bfe..04734135 100644
--- a/wikiteam3/utils/login/__init__.py
+++ b/wikiteam3/utils/login/__init__.py
@@ -4,7 +4,7 @@
import requests
-from wikiteam3.utils.login.api import botLogin, clientLogin, fetchLoginToken
+from wikiteam3.utils.login.api import botLogin, clientLogin
from wikiteam3.utils.login.index import indexLogin
diff --git a/wikiteam3/utils/login/api.py b/wikiteam3/utils/login/api.py
index e1b1f4c3..d87da042 100644
--- a/wikiteam3/utils/login/api.py
+++ b/wikiteam3/utils/login/api.py
@@ -1,6 +1,6 @@
""" Available since MediaWiki 1.27. login to a wiki using username and password (API) """
-from typing import *
+from typing import Optional
import requests
@@ -15,8 +15,7 @@ def fetchLoginToken(session: requests.Session, api: str) -> Optional[str]:
data = response.json()
try:
token = data["query"]["tokens"]["logintoken"]
- if type(token) is str:
- return token
+ return token if type(token) is str else None
except KeyError:
print("fetch login token: Oops! Something went wrong -- ", data)
return None
diff --git a/wikiteam3/utils/login/index.py b/wikiteam3/utils/login/index.py
index 94d332fb..202fe739 100644
--- a/wikiteam3/utils/login/index.py
+++ b/wikiteam3/utils/login/index.py
@@ -1,7 +1,7 @@
""" Always available login methods.(mw 1.16-1.39)
Even oler versions of MW may work, but not tested. """
-from typing import *
+from typing import Optional
import lxml.html
import requests
@@ -45,7 +45,7 @@ def indexLogin(
"title": "Special:UserLogin", # introduced before MW 1.39.
"force": "", # introduced before MW 1.39, empty string is OK.
}
- r = session.post(index, allow_redirects=False, params=params, data=data)
+ r = session.post(index, allow_redirects=False, params=params, data=data) # type: ignore
if r.status_code == 302:
print("index login: Success! Welcome, ", username, "!")
return session
diff --git a/wikiteam3/utils/monkey_patch.py b/wikiteam3/utils/monkey_patch.py
index 6abda313..2ad9323d 100644
--- a/wikiteam3/utils/monkey_patch.py
+++ b/wikiteam3/utils/monkey_patch.py
@@ -3,13 +3,13 @@
from wikiteam3.dumpgenerator.cli.delay import Delay
-def mod_requests_text(requests: requests):
+def mod_requests_text(requests: requests): # type: ignore
"""Monkey patch `requests.Response.text` to remove BOM"""
def new_text(self):
return self.content.lstrip(b"\xef\xbb\xbf").decode(self.encoding)
- requests.Response.text = property(new_text)
+ requests.Response.text = property(new_text) # type: ignore
class DelaySession:
@@ -26,8 +26,8 @@ def hijack(self):
"""Don't forget to call `release()`"""
def new_send(request, **kwargs):
- Delay(msg=self.msg, delay=self.delay, config=self.config)
- return self.old_send(request, **kwargs)
+ Delay(msg=self.msg, delay=self.delay, config=self.config) # type: ignore
+ return self.old_send(request, **kwargs) # type: ignore
self.old_send = self.session.send
self.session.send = new_send
diff --git a/wikiteam3/utils/user_agent.py b/wikiteam3/utils/user_agent.py
index dd1df20b..eef019ee 100644
--- a/wikiteam3/utils/user_agent.py
+++ b/wikiteam3/utils/user_agent.py
@@ -319,10 +319,10 @@ def getUserAgent():
def setupUserAgent(session: requests.Session):
- session._orirequest = session.request
+ session._orirequest = session.request # type: ignore
def newrequest(*args, **kwargs):
session.headers.update({"User-Agent": getUserAgent()})
- return session._orirequest(*args, **kwargs)
+ return session._orirequest(*args, **kwargs) # type: ignore
- session.request = newrequest
+ session.request = newrequest # type: ignore
diff --git a/wikiteam3/utils/wiki_avoid.py b/wikiteam3/utils/wiki_avoid.py
index c7593fdc..aed5641a 100644
--- a/wikiteam3/utils/wiki_avoid.py
+++ b/wikiteam3/utils/wiki_avoid.py
@@ -1,11 +1,11 @@
import re
import sys
-from typing import *
+from typing import Dict
from wikiteam3.dumpgenerator.config import Config
-def avoidWikimediaProjects(config: Config = None, other: Dict = None):
+def avoidWikimediaProjects(config: Config, other: Dict):
"""Skip Wikimedia projects and redirect to the dumps website"""
# notice about wikipedia dumps