From a924b136b7ddec25ea91d602307b8eeb32b9fdda Mon Sep 17 00:00:00 2001 From: karlicoss Date: Sat, 14 Oct 2023 11:25:37 +0100 Subject: [PATCH] rename core.modules.json_new -> core.modules.json and core.modules.xml_clean -> core.modules.xml I think initially it was like this because I was running it like `bleanser/core/modules/xml.py prune ...` This would implicitly add src/bleanser/core to PYTHONPATH, and if you have any modules conflicting with builtin modules, it would shadow them (like xml or json), and result in all sorts of trouble (e.g. segfaults). Running as `python3 -m bleanser.core.modules.xml prune ...` is much cleaner anyway and results in less trouble in other aspects as well, so let's just do that and rename the core modules for the ease of discovery --- README.md | 8 ++++---- src/bleanser/core/modules/extract.py | 0 src/bleanser/core/modules/{json_new.py => json.py} | 0 src/bleanser/core/modules/sqlite.py | 0 src/bleanser/core/modules/{xml_clean.py => xml.py} | 4 ---- src/bleanser/modules/bumble_android.py | 2 +- src/bleanser/modules/foursquare.py | 2 +- src/bleanser/modules/ghexport.py | 2 +- src/bleanser/modules/goodreads.py | 2 +- src/bleanser/modules/instagram_android.py | 2 +- src/bleanser/modules/json_new.py | 4 ++-- src/bleanser/modules/lastfm.py | 2 +- src/bleanser/modules/monzo.py | 2 +- src/bleanser/modules/pinboard.py | 2 +- src/bleanser/modules/pocket.py | 2 +- src/bleanser/modules/reddit.py | 2 +- src/bleanser/modules/rescuetime.py | 2 +- src/bleanser/modules/skype_android.py | 2 +- src/bleanser/modules/smscalls.py | 2 +- src/bleanser/modules/spotify.py | 2 +- src/bleanser/modules/spotifyexport.py | 2 +- src/bleanser/modules/stackexchange.py | 2 +- src/bleanser/modules/xml_clean.py | 2 +- src/bleanser/tests/test_hypothesis.py | 2 +- 24 files changed, 24 insertions(+), 28 deletions(-) mode change 100755 => 100644 src/bleanser/core/modules/extract.py rename src/bleanser/core/modules/{json_new.py => json.py} (100%) mode change 100755 => 100644 mode change 100755 => 100644 src/bleanser/core/modules/sqlite.py rename src/bleanser/core/modules/{xml_clean.py => xml.py} (96%) mode change 100755 => 100644 diff --git a/README.md b/README.md index 85469fb..6bd242d 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ if __name__ == "__main__": This is **always** acting on the data loaded into memory/temporary files, it is not modifying the files itself. Once it determines an input file can be pruned, it will warn you by default, and you can specify `--move` or `--remove` with the CLI (see below) to remove it. -There are particular normalisers for different filetypes, e.g. [`json`](./src/bleanser/core/modules/json_new.py), [`xml`](./src/bleanser/core/modules/xml_clean.py), [`sqlite`](./src/bleanser/core/modules/sqlite.py) which might work if your data is especially basic, but typically this requires subclassing one of those and writing some custom code to 'cleanup' the data, so it can be properly compared/diffed. +There are particular normalisers for different filetypes, e.g. [`json`](./src/bleanser/core/modules.json.py), [`xml`](./src/bleanser/core/modules/xml_clean.py), [`sqlite`](./src/bleanser/core/modules/sqlite.py) which might work if your data is especially basic, but typically this requires subclassing one of those and writing some custom code to 'cleanup' the data, so it can be properly compared/diffed. ### do_cleanup @@ -117,7 +117,7 @@ As it can be a bit difficult to follow, generally this is doing something like. For example, the JSON normaliser calls a `cleanup` function before it starts processing the data. If you wanted to remove the `images` key like shown above, you could do so like: ```python -from bleanser.core.modules.json_new import JsonNormaliser, delkeys, Json +from bleanser.core.modules.json import JsonNormaliser, delkeys, Json class Normaliser(JsonNormaliser): @@ -136,8 +136,8 @@ if __name__ == '__main__': For common formats, the helper classes handle all the tedious bits like loading/parsing data, managing the temporary files. The `Normaliser.main` calls the CLI, which looks like this: ``` - $ python3 -m bleanser.core.modules.json_new prune --help -Usage: python -m bleanser.core.modules.json_new prune [OPTIONS] PATH + $ python3 -m bleanser.core.modules.json prune --help +Usage: python -m bleanser.core.modules.json prune [OPTIONS] PATH Options: --glob Treat the path as glob (in the glob.glob sense) diff --git a/src/bleanser/core/modules/extract.py b/src/bleanser/core/modules/extract.py old mode 100755 new mode 100644 diff --git a/src/bleanser/core/modules/json_new.py b/src/bleanser/core/modules/json.py old mode 100755 new mode 100644 similarity index 100% rename from src/bleanser/core/modules/json_new.py rename to src/bleanser/core/modules/json.py diff --git a/src/bleanser/core/modules/sqlite.py b/src/bleanser/core/modules/sqlite.py old mode 100755 new mode 100644 diff --git a/src/bleanser/core/modules/xml_clean.py b/src/bleanser/core/modules/xml.py old mode 100755 new mode 100644 similarity index 96% rename from src/bleanser/core/modules/xml_clean.py rename to src/bleanser/core/modules/xml.py index 09a8233..1375203 --- a/src/bleanser/core/modules/xml_clean.py +++ b/src/bleanser/core/modules/xml.py @@ -1,8 +1,4 @@ #!/usr/bin/env python3 -""" -Ugh, wtf?? If I name it simply 'xml', I get all sorts of weird behaviours... presumably because it conflicts with some system modules.. -""" - from lxml import etree from contextlib import contextmanager diff --git a/src/bleanser/modules/bumble_android.py b/src/bleanser/modules/bumble_android.py index 3e7b4cc..14338a8 100755 --- a/src/bleanser/modules/bumble_android.py +++ b/src/bleanser/modules/bumble_android.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 from bleanser.core.modules.sqlite import SqliteNormaliser, Tool -from bleanser.core.modules.json_new import delkeys +from bleanser.core.modules.json import delkeys import json diff --git a/src/bleanser/modules/foursquare.py b/src/bleanser/modules/foursquare.py index d7c4bb5..f0e8863 100755 --- a/src/bleanser/modules/foursquare.py +++ b/src/bleanser/modules/foursquare.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 from __future__ import annotations -from bleanser.core.modules.json_new import JsonNormaliser, Json, delkeys +from bleanser.core.modules.json import JsonNormaliser, Json, delkeys TARGET = object() diff --git a/src/bleanser/modules/ghexport.py b/src/bleanser/modules/ghexport.py index e80cde3..e7d9642 100755 --- a/src/bleanser/modules/ghexport.py +++ b/src/bleanser/modules/ghexport.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from bleanser.core.modules.json_new import JsonNormaliser, Json +from bleanser.core.modules.json import JsonNormaliser, Json class Normaliser(JsonNormaliser): diff --git a/src/bleanser/modules/goodreads.py b/src/bleanser/modules/goodreads.py index 6e91ca9..1c75c70 100755 --- a/src/bleanser/modules/goodreads.py +++ b/src/bleanser/modules/goodreads.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from bleanser.core.modules.xml_clean import Normaliser as XmlNormaliser +from bleanser.core.modules.xml import Normaliser as XmlNormaliser class Normaliser(XmlNormaliser): diff --git a/src/bleanser/modules/instagram_android.py b/src/bleanser/modules/instagram_android.py index 7ee7557..81dfff6 100755 --- a/src/bleanser/modules/instagram_android.py +++ b/src/bleanser/modules/instagram_android.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from bleanser.core.modules.json_new import delkeys, patch_atoms +from bleanser.core.modules.json import delkeys, patch_atoms from bleanser.core.modules.sqlite import SqliteNormaliser, Tool import json diff --git a/src/bleanser/modules/json_new.py b/src/bleanser/modules/json_new.py index 6f45eaa..69bd420 100755 --- a/src/bleanser/modules/json_new.py +++ b/src/bleanser/modules/json_new.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -from bleanser.core.modules.json_new import * # noqa: F403, F401 +from bleanser.core.modules.json import * # noqa: F403, F401 import warnings -warnings.warn("Module 'bleanser.modules.json_new' is deprecated. Use 'bleanser.core.modules.json_new' instead.", DeprecationWarning) +warnings.warn("Module 'bleanser.modules.json_new' is deprecated. Use 'bleanser.core.modules.json' instead.", DeprecationWarning) if __name__ == '__main__': diff --git a/src/bleanser/modules/lastfm.py b/src/bleanser/modules/lastfm.py index 5e870e7..ea9f4ca 100755 --- a/src/bleanser/modules/lastfm.py +++ b/src/bleanser/modules/lastfm.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from bleanser.core.modules.json_new import JsonNormaliser, Json +from bleanser.core.modules.json import JsonNormaliser, Json class Normaliser(JsonNormaliser): diff --git a/src/bleanser/modules/monzo.py b/src/bleanser/modules/monzo.py index 41e21b4..47883f3 100755 --- a/src/bleanser/modules/monzo.py +++ b/src/bleanser/modules/monzo.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from bleanser.core.modules.json_new import JsonNormaliser, Json, delkeys +from bleanser.core.modules.json import JsonNormaliser, Json, delkeys class Normaliser(JsonNormaliser): diff --git a/src/bleanser/modules/pinboard.py b/src/bleanser/modules/pinboard.py index da6aa25..91d8523 100755 --- a/src/bleanser/modules/pinboard.py +++ b/src/bleanser/modules/pinboard.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from bleanser.core.modules.json_new import JsonNormaliser +from bleanser.core.modules.json import JsonNormaliser class Normaliser(JsonNormaliser): diff --git a/src/bleanser/modules/pocket.py b/src/bleanser/modules/pocket.py index 343d1d4..64a94a1 100755 --- a/src/bleanser/modules/pocket.py +++ b/src/bleanser/modules/pocket.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from bleanser.core.modules.json_new import JsonNormaliser, Json, delkeys +from bleanser.core.modules.json import JsonNormaliser, Json, delkeys class Normaliser(JsonNormaliser): diff --git a/src/bleanser/modules/reddit.py b/src/bleanser/modules/reddit.py index 91e3395..de414da 100755 --- a/src/bleanser/modules/reddit.py +++ b/src/bleanser/modules/reddit.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 from itertools import chain -from bleanser.core.modules.json_new import JsonNormaliser, Json, delkeys +from bleanser.core.modules.json import JsonNormaliser, Json, delkeys REDDIT_IGNORE_KEYS = { diff --git a/src/bleanser/modules/rescuetime.py b/src/bleanser/modules/rescuetime.py index 151923d..49db750 100755 --- a/src/bleanser/modules/rescuetime.py +++ b/src/bleanser/modules/rescuetime.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from bleanser.core.modules.json_new import JsonNormaliser +from bleanser.core.modules.json import JsonNormaliser class Normaliser(JsonNormaliser): diff --git a/src/bleanser/modules/skype_android.py b/src/bleanser/modules/skype_android.py index 389b3fa..e248f80 100644 --- a/src/bleanser/modules/skype_android.py +++ b/src/bleanser/modules/skype_android.py @@ -2,7 +2,7 @@ import json from bleanser.core.modules.sqlite import SqliteNormaliser, Tool -from bleanser.core.modules.json_new import delkeys +from bleanser.core.modules.json import delkeys class Normaliser(SqliteNormaliser): diff --git a/src/bleanser/modules/smscalls.py b/src/bleanser/modules/smscalls.py index 599ff1c..0ad66a6 100755 --- a/src/bleanser/modules/smscalls.py +++ b/src/bleanser/modules/smscalls.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from bleanser.core.modules.xml_clean import Normaliser as XmlNormaliser +from bleanser.core.modules.xml import Normaliser as XmlNormaliser class Normaliser(XmlNormaliser): diff --git a/src/bleanser/modules/spotify.py b/src/bleanser/modules/spotify.py index ef10ebf..7fd484a 100755 --- a/src/bleanser/modules/spotify.py +++ b/src/bleanser/modules/spotify.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from bleanser.core.modules.json_new import JsonNormaliser, Json, delkeys +from bleanser.core.modules.json import JsonNormaliser, Json, delkeys class Normaliser(JsonNormaliser): diff --git a/src/bleanser/modules/spotifyexport.py b/src/bleanser/modules/spotifyexport.py index 9465b2d..4a21243 100755 --- a/src/bleanser/modules/spotifyexport.py +++ b/src/bleanser/modules/spotifyexport.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from bleanser.core.modules.json_new import JsonNormaliser, delkeys, Json +from bleanser.core.modules.json import JsonNormaliser, delkeys, Json class Normaliser(JsonNormaliser): diff --git a/src/bleanser/modules/stackexchange.py b/src/bleanser/modules/stackexchange.py index 65e06b1..68be79f 100644 --- a/src/bleanser/modules/stackexchange.py +++ b/src/bleanser/modules/stackexchange.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from bleanser.core.modules.json_new import JsonNormaliser, Json, delkeys +from bleanser.core.modules.json import JsonNormaliser, Json, delkeys class Normaliser(JsonNormaliser): diff --git a/src/bleanser/modules/xml_clean.py b/src/bleanser/modules/xml_clean.py index 5216f48..cc8be15 100755 --- a/src/bleanser/modules/xml_clean.py +++ b/src/bleanser/modules/xml_clean.py @@ -1,4 +1,4 @@ -from bleanser.core.modules.xml_clean import * # noqa: F401, F403 +from bleanser.core.modules.xml import * # noqa: F401, F403 import warnings warnings.warn("Module 'bleanser.modules.xml_clean' is deprecated. Use 'bleanser.core.modules.xml_clean' instead.", DeprecationWarning) diff --git a/src/bleanser/tests/test_hypothesis.py b/src/bleanser/tests/test_hypothesis.py index 01fffe2..2e10f23 100644 --- a/src/bleanser/tests/test_hypothesis.py +++ b/src/bleanser/tests/test_hypothesis.py @@ -2,7 +2,7 @@ import pytest -from bleanser.core.modules.json_new import JsonNormaliser as Normaliser +from bleanser.core.modules.json import JsonNormaliser as Normaliser from bleanser.tests.common import TESTDATA, actions, hack_attribute