karlicoss · karlicoss · Oct 17, 2023 · Oct 17, 2023 · Oct 18, 2023
diff --git a/src/bleanser/modules/hpi/hypothesis.py b/src/bleanser/modules/hpi/hypothesis.py
@@ -0,0 +1,31 @@
+from pathlib import Path
+from typing import Any, Iterator
+
+from bleanser.core.modules.extract import ExtractObjectsNormaliser
+
+from my.core.cfg import tmp_config
+import my.hypothesis
+
+
+# FIXME need to disable cachew when using normalising via HPI
+# otherwise will mess up the cache all the time
+# or even potentially can give inconsistent results if there is a bug in cache key
+
+
+class Normaliser(ExtractObjectsNormaliser):
+    def extract_objects(self, path: Path) -> Iterator[Any]:
+        class config:
+            class hypothesis:
+                export_path = path
+
+        with tmp_config(modules=my.hypothesis.__name__, config=config):
+            ## sanity check to make sure tmp_config worked as expected
+            # for most modules should be able to use module.inputs() directly though
+            dal = my.hypothesis._dal()
+            assert len(dal.sources) == 1
+            ##
+            yield from my.hypothesis.highlights()
+
+
+if __name__ == '__main__':
+    Normaliser.main()
diff --git a/src/bleanser/modules/hpi/kobo.py b/src/bleanser/modules/hpi/kobo.py
@@ -0,0 +1,33 @@
+from pathlib import Path
+from typing import Any, Iterator
+
+from bleanser.core.modules.extract import ExtractObjectsNormaliser
+
+from my.core.cfg import tmp_config
+import my.kobo
+
+
+class Normaliser(ExtractObjectsNormaliser):
+    def extract_objects(self, path: Path) -> Iterator[Any]:
+        class config:
+            class kobo:
+                export_path = path
+
+        with tmp_config(modules=my.kobo.__name__, config=config):
+            assert len(my.kobo.DATABASES) == 1
+            yield from []
+
+            yield from my.kobo._iter_highlights()
+            # iter_highlights
+            # iter_events
+            #
+            ## sanity check to make sure tmp_config worked as expected
+            # for most modules should be able to use module.inputs() directly though
+            # dal = my.hypothesis._dal()
+            # assert len(dal.sources) == 1
+            ##
+            # yield from my.hypothesis.highlights()
+
+
+if __name__ == '__main__':
+    Normaliser.main()
diff --git a/src/bleanser/modules/hpi/reddit.py b/src/bleanser/modules/hpi/reddit.py
@@ -0,0 +1,54 @@
+import os
+from pathlib import Path
+from typing import Any, Iterator
+
+from bleanser.core.modules.extract import ExtractObjectsNormaliser
+
+from my.core.cfg import tmp_config
+from my.core.freezer import Freezer
+
+
+# disable cache, otherwise it's gonna flush it all the time
+os.environ['CACHEW_DISABLE'] = '*'
+os.environ.pop('ENLIGHTEN_ENABLE', None)
+os.environ['LOGGING_LEVEL_rexport_dal'] = 'WARNING'
+# os.environ['LOGGING_LEVEL_my_reddit_rexport'] = 'WARNING'
+
+import my.reddit.rexport as reddit
+
+
+class Normaliser(ExtractObjectsNormaliser):
+    def extract_objects(self, path: Path) -> Iterator[Any]:
+        class config:
+            class reddit:
+                # FIXME need to put in reddit.rexport
+                export_path = path
+
+        with tmp_config(modules=reddit.__name__, config=config):
+            ## sanity check to make sure tmp_config worked as expected
+            # for most modules should be able to use module.inputs() directly though
+            assert len(reddit.inputs()) == 1
+
+            reddit_profile = lambda: [reddit.profile()]
+            for (method, type_) in [
+                # fmt: off
+                (reddit.saved       , reddit.Save       ),
+                (reddit.comments    , reddit.Comment    ),
+                (reddit.submissions , reddit.Submission ),
+                (reddit.upvoted     , reddit.Upvote     ),
+                (reddit.subreddits  , reddit.Subreddit  ),
+                (reddit.multireddits, reddit.Multireddit),
+                (reddit_profile     , reddit.Profile    ),
+                # fmt: on
+            ]:
+                # need to run it past freezer so it's dumped as dataclass
+                freezer = Freezer(Orig=type_)
+                for x in map(freezer.freeze, method()):
+                    # raw data might be too noisy
+                    x.raw = None  # type: ignore
+                    # FIXME currently freezer hardcodes RRR for dataclass name
+                    yield {type_.__name__: x}
+
+
+if __name__ == '__main__':
+    Normaliser.main()
diff --git a/src/bleanser/modules/hypothesis.py b/src/bleanser/modules/hypothesis.py
@@ -0,0 +1,13 @@
+from bleanser.core.modules.json import JsonNormaliser, Json
+
+
+class Normaliser(JsonNormaliser):
+    def cleanup(self, j: Json) -> Json:
+        if isinstance(j, list):
+            # old export format
+            return j
+        del j['profile']['features']  # flaky
+        return j
+
+if __name__ == '__main__':
+    Normaliser.main()