From 64bbf3e75ed019390f8b52e6504a7b7e6ccff881 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Mon, 1 Jul 2024 00:00:19 +0300 Subject: [PATCH] chore: move shared into internal --- README.md | 40 +- README.md.temp | 26 +- ckanext/collection/cli.py | 4 +- ckanext/collection/interfaces.py | 15 +- ckanext/collection/plugin.py | 9 +- ckanext/collection/shared.py | 302 +--------- ckanext/collection/tests/conftest.py | 6 +- ckanext/collection/tests/test_dive.py | 6 +- ckanext/collection/tests/test_shared.py | 67 ++- ckanext/collection/tests/utils/test_data.py | 10 + ckanext/collection/utils/collection/base.py | 41 +- .../collection/utils/collection/explorer.py | 16 +- ckanext/collection/utils/columns.py | 28 +- ckanext/collection/utils/data/__init__.py | 4 +- ckanext/collection/utils/data/api.py | 12 +- ckanext/collection/utils/data/base.py | 13 +- ckanext/collection/utils/data/db.py | 6 +- ckanext/collection/utils/data/model.py | 26 +- ckanext/collection/utils/data/stream.py | 4 +- ckanext/collection/utils/db_connection.py | 14 +- ckanext/collection/utils/filters.py | 10 +- ckanext/collection/utils/pager.py | 30 +- .../collection/utils/serialize/__init__.py | 116 ++-- docs/detailed.md | 557 ------------------ docs/structure/collection.md | 196 ++++++ docs/structure/columns.md | 21 + docs/structure/data.md | 55 ++ docs/structure/filters.md | 9 + docs/structure/index.md | 80 ++- docs/structure/pager.md | 50 ++ docs/structure/serializer.md | 34 ++ docs/usage.md | 285 +++++++-- mkdocs.yml | 5 +- 33 files changed, 1003 insertions(+), 1094 deletions(-) diff --git a/README.md b/README.md index e4a4ec4..a91a291 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # ckanext-collection -Tools for building interfaces for data collections. +Tools for building interfaces for data collections using declarative style. This extension simplifies describing series of items, such as datasets from search page, users registered on portal, rows of CSV file, tables in DB, @@ -28,27 +28,26 @@ Add `collection` to the `ckan.plugins` setting in your CKAN config file Define the collection ```python - from ckan import model -from ckanext.collection.utils import * +from ckanext.collection.shared import collection, data, columns, serialize -## collection of all resources -class MyCollection(Collection): - DataFactory = ModelData.with_attributes(model=model.Resource) +## collection of all resources from DB +class MyCollection(collection.Collection): + DataFactory = data.ModelData.with_attributes(model=model.Resource) # `names` controls names of fields exported by serializer # further in this guide - ColumnsFactory = Columns.with_attributes(names=["name", "size"]) + ColumnsFactory = columns.Columns.with_attributes(names=["name", "size"]) ## collection of all packages available via search API -class MyCollection(Collection): - DataFactory = ApiSearchData.with_attributes(action="package_search") - ColumnsFactory = Columns.with_attributes(names=["name", "title"]) +class MyCollection(collection.Collection): + DataFactory = data.ApiSearchData.with_attributes(action="package_search") + ColumnsFactory = columns.Columns.with_attributes(names=["name", "title"]) ## collection of all records from CSV file -class MyCollection(Collection): - DataFactory = CsvFileData.with_attributes(source="/path/to/file.csv") - ColumnsFactory = Columns.with_attributes(names=["a", "b"]) +class MyCollection(collection.Collection): + DataFactory = data.CsvFileData.with_attributes(source="/path/to/file.csv") + ColumnsFactory = columns.Columns.with_attributes(names=["a", "b"]) ``` @@ -57,7 +56,7 @@ Initialize collection object and work with data: ```python # collection with first page of results(1st-10th items) -col = MyCollection("", {}) +col = MyCollection() items = list(col) # collection with third page of results(21st-30th items) @@ -67,12 +66,11 @@ items = list(col) # alternatively, read all the items into memory at once, without pagination. # It may be quite expensive operation depending on number of items -col = MyCollection("", {}) +col = MyCollection() items = list(col.data) -# or get the slice of data from 2nd till 5th(not includeing 5th, -# just like in python slices) -items = col.data.range(2, 5) +# or get the slice of data from 8th till 12th +items = list(col.data[8:12]) # check total number of items in collection print(col.data.total) @@ -84,13 +82,13 @@ Serialize data using `Serializer` service: ```python # JSON string -serializer = JsonSerializer(col) +serializer = serialize.JsonSerializer(col) # or CSV string -serializer = CsvSerializer(col) +serializer = serialize.CsvSerializer(col) # or python list of dictionaries -serializer = DictListSerializer(col) +serializer = serialize.DictListSerializer(col) print(serializer.serialize()) diff --git a/README.md.temp b/README.md.temp index e4a4ec4..cbc6289 100644 --- a/README.md.temp +++ b/README.md.temp @@ -30,25 +30,25 @@ Define the collection ```python from ckan import model -from ckanext.collection.utils import * +from ckanext.collection.shared import collection, data, columns, serialize ## collection of all resources -class MyCollection(Collection): - DataFactory = ModelData.with_attributes(model=model.Resource) +class MyCollection(collection.Collection): + DataFactory = data.ModelData.with_attributes(model=model.Resource) # `names` controls names of fields exported by serializer # further in this guide - ColumnsFactory = Columns.with_attributes(names=["name", "size"]) + ColumnsFactory = columns.Columns.with_attributes(names=["name", "size"]) ## collection of all packages available via search API -class MyCollection(Collection): - DataFactory = ApiSearchData.with_attributes(action="package_search") - ColumnsFactory = Columns.with_attributes(names=["name", "title"]) +class MyCollection(collection.Collection): + DataFactory = data.ApiSearchData.with_attributes(action="package_search") + ColumnsFactory = columns.Columns.with_attributes(names=["name", "title"]) ## collection of all records from CSV file -class MyCollection(Collection): - DataFactory = CsvFileData.with_attributes(source="/path/to/file.csv") - ColumnsFactory = Columns.with_attributes(names=["a", "b"]) +class MyCollection(collection.Collection): + DataFactory = data.CsvFileData.with_attributes(source="/path/to/file.csv") + ColumnsFactory = columns.Columns.with_attributes(names=["a", "b"]) ``` @@ -84,13 +84,13 @@ Serialize data using `Serializer` service: ```python # JSON string -serializer = JsonSerializer(col) +serializer = serialize.JsonSerializer(col) # or CSV string -serializer = CsvSerializer(col) +serializer = serialize.CsvSerializer(col) # or python list of dictionaries -serializer = DictListSerializer(col) +serializer = serialize.DictListSerializer(col) print(serializer.serialize()) diff --git a/ckanext/collection/cli.py b/ckanext/collection/cli.py index cf2cb41..b951520 100644 --- a/ckanext/collection/cli.py +++ b/ckanext/collection/cli.py @@ -2,7 +2,7 @@ import click -from ckanext.collection import shared +from ckanext.collection.internal import collection_registry __all__ = ["collection"] @@ -16,6 +16,6 @@ def collection(): @click.option("--name-only", is_flag=True, help="Show only collection names") def list_collections(name_only: bool): """List all registered collections.""" - for name, collection in shared.collection_registry.members.items(): + for name, collection in collection_registry.members.items(): line = name if name_only else f"{name}: {collection}" click.secho(line) diff --git a/ckanext/collection/interfaces.py b/ckanext/collection/interfaces.py index 2819ff6..44e5700 100644 --- a/ckanext/collection/interfaces.py +++ b/ckanext/collection/interfaces.py @@ -6,7 +6,20 @@ class ICollection(Interface): - """Extend functionality of ckanext-collections""" + """Extend functionality of ckanext-collections + + Example: + ```python + import ckan.plugins as p + from ckanext.collection import shared + + class MyPlugin(p.SingletonPlugin): + p.implements(shared.ICollection, inherit=True) + + def get_collection_factories(self) -> dict[str, CollectionFactory]: + return {...} + ``` + """ def get_collection_factories(self) -> dict[str, CollectionFactory]: """Register named collection factories. diff --git a/ckanext/collection/plugin.py b/ckanext/collection/plugin.py index 487a895..73e6382 100644 --- a/ckanext/collection/plugin.py +++ b/ckanext/collection/plugin.py @@ -8,8 +8,9 @@ import ckan.plugins.toolkit as tk from ckan.common import CKANConfig -from . import shared, signals +from . import signals from .interfaces import CollectionFactory, ICollection +from .internal import collection_registry try: from ckanext.ap_main.interfaces import IAdminPanel @@ -99,11 +100,11 @@ def get_collection_factories(self) -> dict[str, CollectionFactory]: def _register_collections(): - shared.collection_registry.reset() + collection_registry.reset() for plugin in p.PluginImplementations(ICollection): for name, factory in plugin.get_collection_factories().items(): - shared.collection_registry.register(name, factory) + collection_registry.register(name, factory) results = cast( "list[tuple[Any, dict[str, CollectionFactory]]]", @@ -111,4 +112,4 @@ def _register_collections(): ) for _, factories in results: for name, factory in factories.items(): - shared.collection_registry.register(name, factory) + collection_registry.register(name, factory) diff --git a/ckanext/collection/shared.py b/ckanext/collection/shared.py index 26d8cc0..f8f9757 100644 --- a/ckanext/collection/shared.py +++ b/ckanext/collection/shared.py @@ -1,279 +1,29 @@ -"""Logic used across collection utilities. - -""" - from __future__ import annotations -import abc -import dataclasses -import inspect -import logging -from collections.abc import Hashable -from typing import Any, Callable, Generic, TypeVar, cast - -import ckan.plugins.toolkit as tk - from . import types - -log = logging.getLogger(__name__) -T = TypeVar("T") - - -class Sentinel: ... - - -SENTINEL: Any = Sentinel() - - -@dataclasses.dataclass -class Registry(Generic[T]): - members: dict[Hashable, T] - - def reset(self): - self.members.clear() - - def register(self, name: Hashable, member: T): - self.members[name] = member - - def get(self, name: Hashable) -> T | None: - return self.members.get(name) - - -collection_registry: Registry[types.CollectionFactory] = Registry({}) - - -class AttachTrait(abc.ABC, Generic[types.TDataCollection]): - """Attach collection to the current object. - - `_attach` method must be called as early as possible in the constructor of - the derived class. It makes collection available inside an instance via - `attached` property. Because initialization of collection utilities often - relies on collection details, you should call `_attach` before any other - logic. - - Example: - - >>> class Impl(AttachTrait): - >>> def __init__(self, collection): - >>> self._attach(collection) - >>> - >>> collection = object() - >>> obj = Impl(collection) - >>> assert obj.attached is collection - - """ - - __collection: types.TDataCollection - - def _attach(self, obj: types.TDataCollection): - self.__collection = obj - if isinstance(self, types.Service): - # this block allows attaching services to non-collections. Mainly - # it's used in tests - replace_service = getattr(obj, "replace_service", None) - if replace_service: - replace_service(self) - - @property - def attached(self) -> types.TDataCollection: - return self.__collection - - -class AttrSettingsTrait: - """Get attribute value from settings for the current utility object. - - To mark an attribute as configurable, declare it using - `configurable_attribute` function: - - >>> class Impl(AttrSettingsTrait): - >>> attr = configurable_attribute("default value") - - To initialize attributes, call `_gather_settings` with dictionary - containing attribute values. If attribute is available in the dictionary, - dictionary value moved into attribute: - - >>> obj = Impl() - >>> obj._gather_settings({"attr": "custom value"}) - >>> assert obj.attr == "custom value" - - If dictionary doesn't have a member named after the attribute, default - value/factory is used to initialize the attribute - - >>> obj = Impl() - >>> obj._gather_settings({}) - >>> assert obj.attr == "default value" - - `_gather_settings` can be called multiple times to reset/modify - settings. Keep in mind, that every call processes all the configurable - attributes of the instance. For example, if instance has `a` and `b` - configurable attributes and you call `_gather_settings` with `{"a": 1}` - first, and then with `{"b": 2}`, value of `a` will be removed and replaced - by the default: - - >>> class Impl(AttrSettingsTrait): - >>> a = configurable_attribute(None) - >>> b = configurable_attribute(None) - >>> - >>> obj = Impl() - >>> obj._gather_settings({"a": 1}) - >>> assert obj.a == 1 - >>> assert obj.b is None - >>> - >>> obj._gather_settings({"b": 2}) - >>> assert obj.a == None - >>> assert obj.b is 2 - >>> - >>> obj._gather_settings({"b": 2, "a": 1}) - >>> assert obj.a == 1 - >>> assert obj.b == 2 - >>> - >>> obj._gather_settings({}) - >>> assert obj.a is None - >>> assert obj.b is None - - The best place to gather settings, early in the constructor, right after - `_attach`. Example: - - >>> class Impl(AttachTrait, AttrSettingsTrait): - >>> a = configurable_attribute(None) - >>> b = configurable_attribute(None) - >>> - >>> def __init__(self, collection, **kwargs): - >>> self._attach(collection) - >>> self._gather_settings(kwargs) - - """ - - def _gather_settings(self, kwargs: dict[str, Any]): - for k, attr in inspect.getmembers( - type(self), - lambda attr: isinstance(attr, _InitAttr), - ): - attr: _InitAttr - if k in kwargs: - setattr(self, k, kwargs.pop(k)) - - else: - setattr(self, k, attr.get_default(self, k)) - - -@dataclasses.dataclass -class _InitAttr: - default: Any = SENTINEL - default_factory: Callable[[Any], Any] = SENTINEL - - def __post_init__(self): - if self.default is not self.default_factory: - return - - def get_default(self, obj: Any, name: str): - if self.default is not SENTINEL: - return self.default - - if self.default_factory is not SENTINEL: - return self.default_factory(obj) - - msg = ( - f"__init__() of {obj.__class__.__name__} missing " - + f"1 required keyword-only argument: '{name}'" - ) - raise TypeError(msg) - - -def configurable_attribute( - default: T | Sentinel = SENTINEL, - default_factory: Callable[[Any], T] = SENTINEL, -) -> T: - """Declare configurable attribute. - - Example: - - >>> class DataFactory(Data): - >>> private = configurable_attribute(False) - >>> - >>> data = DataFactory(None, private=True) - >>> assert data.private - """ - return cast(T, _InitAttr(default, cast(Any, default_factory))) - - -class UserTrait(AttrSettingsTrait): - """Add configurable `user` attribute, with default set to - `current_user.name`. - - """ - - user = configurable_attribute( - default_factory=lambda self: tk.current_user.name if tk.current_user else "", - ) - - -class Domain(AttachTrait[types.TDataCollection], AttrSettingsTrait): - """Standard initializer for collection utilities. - - Used as base class for utility instances created during collection - initialization(e.g Pager, Columns, Filters, Data). - - Defines standard constructor signature, attaches collection to the utility - instance and collects settings. - - """ - - def __init__(self, obj: types.TDataCollection, /, **kwargs: Any): - self._attach(obj) - self._gather_settings(kwargs) - - @classmethod - def with_attributes( - cls: type[T], - name: str | None = None, - /, - **attributes: Any, - ) -> type[T]: - """Create anonymous derivable of the class with overriden attributes. - - This is a shortcut for defining a proper subclass of the domain: - - >>> class Parent(Domain): - >>> prop = "parent value" - >>> - >>> class Child(Parent): - >>> prop = "child value" - >>> custom = "custom value" - >>> - >>> # equivalent - >>> Child = Parent.with_attributes(prop="child value", custom="custom value") - """ - return cast("type[T]", type(name or cls.__name__, (cls,), attributes)) - - -def parse_sort(sort: str) -> tuple[str, bool]: - """Parse sort as column and sorting direction. - - Turn `-name` and `name desc` into `(name, True)`. Everything else turns - into `(name, False)`. - - """ - desc = False - if sort.startswith("-"): - sort = sort[1:] - desc = True - - elif len(parts := sort.split()) == 2: - sort, direction = parts - - if direction.lower() == "desc": - desc = True - - return sort, desc - - -def get_collection( - name: str, - params: dict[str, Any], - **kwargs: Any, -) -> types.BaseCollection | None: - if factory := collection_registry.get(name): - return factory(name, params, **kwargs) - - return None +from .interfaces import ICollection +from .internal import ( + Domain, + UserTrait, + configurable_attribute, + get_collection, + parse_sort, +) +from .utils import collection, columns, data, db_connection, filters, pager, serialize + +__all__ = [ + "get_collection", + "parse_sort", + "Domain", + "UserTrait", + "configurable_attribute", + "types", + "ICollection", + "collection", + "columns", + "data", + "db_connection", + "filters", + "pager", + "serialize", +] diff --git a/ckanext/collection/tests/conftest.py b/ckanext/collection/tests/conftest.py index 34d7c64..3a9b6e8 100644 --- a/ckanext/collection/tests/conftest.py +++ b/ckanext/collection/tests/conftest.py @@ -1,11 +1,11 @@ import pytest -from ckanext.collection import shared +from ckanext.collection import internal @pytest.fixture() def collection_registry(): """Collection registry cleaned after each test.""" - yield shared.collection_registry - shared.collection_registry.reset() + yield internal.collection_registry + internal.collection_registry.reset() diff --git a/ckanext/collection/tests/test_dive.py b/ckanext/collection/tests/test_dive.py index 4802196..b40d303 100644 --- a/ckanext/collection/tests/test_dive.py +++ b/ckanext/collection/tests/test_dive.py @@ -4,7 +4,7 @@ from ckan import model -from ckanext.collection import shared +from ckanext.collection import internal from ckanext.collection.utils import * @@ -143,7 +143,7 @@ def test_configurable_attributes(self): col = Collection("name", {}) class MyData(StaticData): - i_am_real = shared.configurable_attribute(False) + i_am_real = internal.configurable_attribute(False) data = MyData(col, data=[], i_am_real=True) assert hasattr(data, "data") @@ -155,7 +155,7 @@ def test_configurable_attribute_default_factory(self): class MyData(StaticData): ref = 42 - i_am_real = shared.configurable_attribute( + i_am_real = internal.configurable_attribute( default_factory=lambda self: self.ref * 10, ) diff --git a/ckanext/collection/tests/test_shared.py b/ckanext/collection/tests/test_shared.py index 1a147dd..ce6ec9d 100644 --- a/ckanext/collection/tests/test_shared.py +++ b/ckanext/collection/tests/test_shared.py @@ -7,10 +7,10 @@ import ckan.plugins.toolkit as tk -from ckanext.collection import shared, utils +from ckanext.collection import internal, utils -class AttachExample(shared.AttachTrait[Any]): +class AttachExample(internal.AttachTrait[Any]): pass @@ -21,7 +21,7 @@ class AttachExample(shared.AttachTrait[Any]): def test_attached_object(obj: Any): """Anything can be attached as a collection to AttachTrait implementation.""" example = AttachExample() - example._attach(obj) + example._attach(obj) # type: ignore assert example.attached is obj @@ -29,11 +29,11 @@ def test_service_attach_updates_collection(): """Anything can be attached as a collection to AttachTrait implementation.""" collection = utils.Collection("", {}) example = AttachExample() - example._attach(collection) + example._attach(collection) # type: ignore assert collection.data is not example - data = utils.Data(None) - data._attach(collection) + data = utils.Data(None) # type: ignore + data._attach(collection) # pyright: ignore[reportPrivateUsage] assert collection.data is data @@ -44,15 +44,17 @@ def test_default_factories(self, faker: Faker): value = faker.pyint() another_value = faker.word() - class Test(shared.AttrSettingsTrait): - test = shared.configurable_attribute(default_factory=lambda self: value) + class Test(internal.AttrSettingsTrait): + test = internal.configurable_attribute(default_factory=lambda self: value) obj = Test() - obj._gather_settings({}) + obj._gather_settings({}) # pyright: ignore[reportPrivateUsage] assert getattr(obj, name) == value - obj._gather_settings({name: another_value}) + obj._gather_settings( # pyright: ignore[reportPrivateUsage] + {name: another_value}, + ) assert getattr(obj, name) == another_value def test_without_factories(self, faker: Faker): @@ -63,46 +65,48 @@ def test_without_factories(self, faker: Faker): name = "test" value = faker.pyint() - class Test(shared.AttrSettingsTrait): - test = shared.configurable_attribute(None) + class Test(internal.AttrSettingsTrait): + test = internal.configurable_attribute(None) obj = Test() - obj._gather_settings({}) + obj._gather_settings({}) # pyright: ignore[reportPrivateUsage] assert getattr(obj, name) is None - obj._gather_settings({name: value}) + obj._gather_settings({name: value}) # pyright: ignore[reportPrivateUsage] assert getattr(obj, name) == value def test_declaration(self, faker: Faker): first = "first" second = "second" - class Test(shared.AttrSettingsTrait): - first = shared.configurable_attribute(default_factory=lambda self: "first") + class Test(internal.AttrSettingsTrait): + first = internal.configurable_attribute( + default_factory=lambda self: "first", + ) obj = Test() - obj._gather_settings({}) + obj._gather_settings({}) # pyright: ignore[reportPrivateUsage] assert getattr(obj, first) == "first" class ChildTest(Test): - first = shared.configurable_attribute( + first = internal.configurable_attribute( default_factory=lambda self: "new first", ) obj = ChildTest() - obj._gather_settings({}) + obj._gather_settings({}) # pyright: ignore[reportPrivateUsage] assert getattr(obj, first) == "new first" class AnotherChildTest(Test): - second = shared.configurable_attribute( + second = internal.configurable_attribute( default_factory=lambda self: "second", ) obj = AnotherChildTest() - obj._gather_settings({}) + obj._gather_settings({}) # pyright: ignore[reportPrivateUsage] assert getattr(obj, first) == "first" assert getattr(obj, second) == "second" @@ -112,36 +116,35 @@ class AnotherChildTest(Test): def test_user_trait(user_factory: Any, faker: Faker, app_with_session: Any): fake_username = faker.name() - class Test(shared.UserTrait): + class Test(internal.UserTrait): pass obj = Test() - obj._gather_settings({}) + obj._gather_settings({}) # pyright: ignore[reportPrivateUsage] assert not obj.user with app_with_session.flask_app.test_request_context(): user = user_factory.model() tk.login_user(user) - obj._gather_settings({}) + obj._gather_settings({}) # pyright: ignore[reportPrivateUsage] assert obj.user == user.name - obj._gather_settings({"user": fake_username}) + obj._gather_settings({"user": fake_username}) # pyright: ignore[reportPrivateUsage] assert obj.user == fake_username class TestDomain: def test_attachment(self): - class Child(shared.Domain[Any]): - ... + class Child(internal.Domain[Any]): ... attachment = object() obj = Child(attachment) assert obj.attached is attachment def test_settings(self): - class Child(shared.Domain[Any]): - prop = shared.configurable_attribute( + class Child(internal.Domain[Any]): + prop = internal.configurable_attribute( default_factory=lambda self: self.attached, ) @@ -157,11 +160,11 @@ def test_with_attributes(self): """ - class Child(shared.Domain[Any]): - prop = shared.configurable_attribute(None) + class Child(internal.Domain[Any]): + prop = internal.configurable_attribute(None) default = object() - derived = Child.with_attributes(prop=shared.configurable_attribute(default)) + derived = Child.with_attributes(prop=internal.configurable_attribute(default)) obj = derived(None) assert obj.prop is default diff --git a/ckanext/collection/tests/utils/test_data.py b/ckanext/collection/tests/utils/test_data.py index 3df7c39..81be09a 100644 --- a/ckanext/collection/tests/utils/test_data.py +++ b/ckanext/collection/tests/utils/test_data.py @@ -44,6 +44,16 @@ def test_data(self, stub: mock.Mock): stub.assert_called_once() + @mock.patch.object(data.Data, "compute_data", return_value=range(25)) + def test_range(self, stub: mock.Mock): + obj = data.Data(None) + for i in range(25): + assert obj[i] == i + + assert list(obj.range(1, 5)) == [1, 2, 3, 4] + + assert list(obj[10:13]) == [10, 11, 12] + @mock.patch.object(data.Data, "compute_total", return_value=0) def test_total(self, stub: mock.Mock): obj = data.Data(None) diff --git a/ckanext/collection/utils/collection/base.py b/ckanext/collection/utils/collection/base.py index 6d3a278..e013fab 100644 --- a/ckanext/collection/utils/collection/base.py +++ b/ckanext/collection/utils/collection/base.py @@ -4,7 +4,7 @@ from typing_extensions import Self -from ckanext.collection import shared, types +from ckanext.collection import internal, types from ckanext.collection.utils.columns import Columns from ckanext.collection.utils.data import Data from ckanext.collection.utils.filters import Filters @@ -85,7 +85,13 @@ class Collection(types.BaseCollection): "serializer", ) - def __init__(self, name: str, params: dict[str, Any], /, **kwargs: Any): + def __init__( + self, + name: str = "", + params: dict[str, Any] | None = None, + /, + **kwargs: Any, + ): """Use name to pick only relevant parameters. When multiple collections rendered on the same page, the use format @@ -93,6 +99,9 @@ def __init__(self, name: str, params: dict[str, Any], /, **kwargs: Any): collections. """ + if params is None: + params = {} + self.name = name if self.name: @@ -104,14 +113,14 @@ def __init__(self, name: str, params: dict[str, Any], /, **kwargs: Any): self.params = params for service in self._service_names: - self.replace_service(self._instantiate(service, kwargs)) + self._instantiate(service, kwargs) def _instantiate(self, name: str, kwargs: dict[str, Any]) -> Any: if factory := kwargs.get(f"{name}_factory"): fn = "".join(p.capitalize() for p in name.split("_")) setattr(self, fn + "Factory", factory) - value: shared.Domain[Any] | None = kwargs.get(f"{name}_instance") + value: internal.Domain[Any] | None = kwargs.get(f"{name}_instance") if value is None: maker = getattr(self, f"make_{name}") value = maker(**kwargs.get(f"{name}_settings", {})) @@ -122,34 +131,34 @@ def _instantiate(self, name: str, kwargs: dict[str, Any]) -> Any: return value @overload - def replace_service(self, service: types.BaseColumns) -> types.BaseColumns | None: - ... + def replace_service( + self, + service: types.BaseColumns, + ) -> types.BaseColumns | None: ... @overload - def replace_service(self, service: types.BaseData) -> types.BaseData | None: - ... + def replace_service(self, service: types.BaseData) -> types.BaseData | None: ... @overload - def replace_service(self, service: types.BasePager) -> types.BasePager | None: - ... + def replace_service(self, service: types.BasePager) -> types.BasePager | None: ... @overload - def replace_service(self, service: types.BaseFilters) -> types.BaseFilters | None: - ... + def replace_service( + self, + service: types.BaseFilters, + ) -> types.BaseFilters | None: ... @overload def replace_service( self, service: types.BaseSerializer, - ) -> types.BaseSerializer | None: - ... + ) -> types.BaseSerializer | None: ... @overload def replace_service( self, service: types.BaseDbConnection, - ) -> types.BaseDbConnection | None: - ... + ) -> types.BaseDbConnection | None: ... def replace_service(self, service: types.Service) -> types.Service | None: """Attach service to collection""" diff --git a/ckanext/collection/utils/collection/explorer.py b/ckanext/collection/utils/collection/explorer.py index a0d56df..b3b6b75 100644 --- a/ckanext/collection/utils/collection/explorer.py +++ b/ckanext/collection/utils/collection/explorer.py @@ -6,7 +6,7 @@ from ckan.authz import is_authorized_boolean from ckan.logic import parse_params -from ckanext.collection import shared, types +from ckanext.collection import internal, types from ckanext.collection.utils.columns import TableColumns from ckanext.collection.utils.data import Data from ckanext.collection.utils.data.db import TableData @@ -18,11 +18,11 @@ class ExplorerSerializer(HtmlSerializer[types.TDataCollection]): - extend_page_template: bool = shared.configurable_attribute( + extend_page_template: bool = internal.configurable_attribute( default_factory=lambda self: tk.request and not tk.request.headers.get("hx-request"), ) - main_template: str = shared.configurable_attribute( + main_template: str = internal.configurable_attribute( "collection/serialize/explorer/main.html", ) @@ -49,7 +49,7 @@ def compute_data(self) -> Iterable[Any]: names = [names] return [ - shared.get_collection( + internal.get_collection( name, params, serializer_settings={"extend_page_template": False}, @@ -57,10 +57,10 @@ def compute_data(self) -> Iterable[Any]: for name in names ] - class FiltersFactory(Filters["CollectionExplorer"], shared.UserTrait): - static_collections: Iterable[str] = shared.configurable_attribute( + class FiltersFactory(Filters["CollectionExplorer"], internal.UserTrait): + static_collections: Iterable[str] = internal.configurable_attribute( default_factory=lambda self: list( - map(str, shared.collection_registry.members), + map(str, internal.collection_registry.members), ), ) @@ -160,7 +160,7 @@ def compute_data(self) -> Iterable[Any]: ] class FiltersFactory(DbFilters["DbExplorer"]): - static_tables: Iterable[str] = shared.configurable_attribute( + static_tables: Iterable[str] = internal.configurable_attribute( default_factory=lambda self: [], ) diff --git a/ckanext/collection/utils/columns.py b/ckanext/collection/utils/columns.py index d690aa4..0826406 100644 --- a/ckanext/collection/utils/columns.py +++ b/ckanext/collection/utils/columns.py @@ -3,12 +3,12 @@ import enum from typing import Any, cast -from ckanext.collection import shared, types +from ckanext.collection import internal, types class Columns( types.BaseColumns, - shared.Domain[types.TDataCollection], + internal.Domain[types.TDataCollection], ): """Collection of columns details for filtering/rendering. @@ -19,23 +19,25 @@ class Columns( labels: UI labels for columns """ - class Default(shared.Sentinel, enum.Enum): + class Default(internal.Sentinel, enum.Enum): ALL = enum.auto() NOT_HIDDEN = enum.auto() NONE = enum.auto() - names: list[str] = shared.configurable_attribute(default_factory=lambda self: []) - hidden: set[str] = shared.configurable_attribute(default_factory=lambda self: set()) - visible: set[str] = shared.configurable_attribute(Default.NOT_HIDDEN) - sortable: set[str] = shared.configurable_attribute(Default.NONE) - filterable: set[str] = shared.configurable_attribute(Default.NONE) - searchable: set[str] = shared.configurable_attribute(Default.NONE) - labels: dict[str, str] = shared.configurable_attribute(Default.ALL) + names: list[str] = internal.configurable_attribute(default_factory=lambda self: []) + hidden: set[str] = internal.configurable_attribute( + default_factory=lambda self: set(), + ) + visible: set[str] = internal.configurable_attribute(Default.NOT_HIDDEN) + sortable: set[str] = internal.configurable_attribute(Default.NONE) + filterable: set[str] = internal.configurable_attribute(Default.NONE) + searchable: set[str] = internal.configurable_attribute(Default.NONE) + labels: dict[str, str] = internal.configurable_attribute(Default.ALL) serializers: dict[ str, list[tuple[str, dict[str, Any]]], - ] = shared.configurable_attribute( + ] = internal.configurable_attribute( default_factory=lambda self: {}, ) @@ -115,8 +117,8 @@ class DbColumns(Columns[types.TDbCollection]): class TableColumns(DbColumns[types.TDbCollection]): - table: str = shared.configurable_attribute() - filterable: set[str] = shared.configurable_attribute( + table: str = internal.configurable_attribute() + filterable: set[str] = internal.configurable_attribute( default_factory=lambda self: self.Default.NONE, ) diff --git a/ckanext/collection/utils/data/__init__.py b/ckanext/collection/utils/data/__init__.py index 81bdfac..5ee8905 100644 --- a/ckanext/collection/utils/data/__init__.py +++ b/ckanext/collection/utils/data/__init__.py @@ -3,7 +3,7 @@ import logging from typing import Iterable -from ckanext.collection import shared, types +from ckanext.collection import internal, types from .api import ApiData, ApiListData, ApiSearchData from .base import Data @@ -41,7 +41,7 @@ class StaticData(Data[types.TData, types.TDataCollection]): This class turns existing iterable into a data source. """ - data: Iterable[types.TData] = shared.configurable_attribute( + data: Iterable[types.TData] = internal.configurable_attribute( default_factory=lambda self: [], ) diff --git a/ckanext/collection/utils/data/api.py b/ckanext/collection/utils/data/api.py index 2208ecb..6cb2853 100644 --- a/ckanext/collection/utils/data/api.py +++ b/ckanext/collection/utils/data/api.py @@ -7,14 +7,14 @@ import ckan.plugins.toolkit as tk from ckan.types import Context -from ckanext.collection import shared, types +from ckanext.collection import internal, types from .base import Data log = logging.getLogger(__name__) -class ApiData(Data[types.TData, types.TDataCollection], shared.UserTrait): +class ApiData(Data[types.TData, types.TDataCollection], internal.UserTrait): """API data source. This base class is suitable for building API calls. @@ -23,11 +23,11 @@ class ApiData(Data[types.TData, types.TDataCollection], shared.UserTrait): action: API action that returns the data """ - action: str = shared.configurable_attribute() - payload: dict[str, Any] = shared.configurable_attribute( + action: str = internal.configurable_attribute() + payload: dict[str, Any] = internal.configurable_attribute( default_factory=lambda self: {}, ) - ignore_auth: bool = shared.configurable_attribute(False) + ignore_auth: bool = internal.configurable_attribute(False) def make_context(self): return Context(user=self.user, ignore_auth=self.ignore_auth) @@ -59,7 +59,7 @@ def get_sort(self) -> dict[str, str]: if not sort: return {} - column, desc = shared.parse_sort(sort) + column, desc = internal.parse_sort(sort) if column not in self.attached.columns.sortable: log.warning("Unexpected sort value: %s", sort) diff --git a/ckanext/collection/utils/data/base.py b/ckanext/collection/utils/data/base.py index 0354df5..619dc5d 100644 --- a/ckanext/collection/utils/data/base.py +++ b/ckanext/collection/utils/data/base.py @@ -3,12 +3,12 @@ from functools import cached_property from typing import Any, Generic, Iterable, Iterator -from ckanext.collection import shared, types +from ckanext.collection import internal, types class Data( types.BaseData, - shared.Domain[types.TDataCollection], + internal.Domain[types.TDataCollection], Generic[types.TData, types.TDataCollection], ): """Data source for collection. @@ -37,6 +37,11 @@ def refresh_data(self): self._data = self.compute_data() self._total = self.compute_total(self._data) + def __getitem__(self, key: Any): + if isinstance(key, slice): + return self.range(key.start, key.stop) + return self.at(key) + def compute_data(self) -> Any: """Produce data.""" return [] @@ -49,6 +54,10 @@ def range(self, start: Any, end: Any) -> Iterable[types.TData]: """Slice data.""" return self._data[start:end] + def at(self, index: Any) -> types.TData: + """Slice data.""" + return self._data[index] + @property def total(self): return self._total diff --git a/ckanext/collection/utils/data/db.py b/ckanext/collection/utils/data/db.py index 518377b..ff74210 100644 --- a/ckanext/collection/utils/data/db.py +++ b/ckanext/collection/utils/data/db.py @@ -6,7 +6,7 @@ import sqlalchemy as sa from sqlalchemy.sql.selectable import GenerativeSelect, Select -from ckanext.collection import shared, types +from ckanext.collection import internal, types from .model import BaseSaData @@ -20,8 +20,8 @@ def _execute(self, stmt: GenerativeSelect): class TableData(DbData[types.TData, types.TDbCollection]): - table: str = shared.configurable_attribute() - static_columns: Iterable[Any] = shared.configurable_attribute( + table: str = internal.configurable_attribute() + static_columns: Iterable[Any] = internal.configurable_attribute( default_factory=lambda self: [], ) diff --git a/ckanext/collection/utils/data/model.py b/ckanext/collection/utils/data/model.py index be52920..fdf402d 100644 --- a/ckanext/collection/utils/data/model.py +++ b/ckanext/collection/utils/data/model.py @@ -14,7 +14,7 @@ from ckan import model from ckan.types import AlchemySession -from ckanext.collection import shared, types +from ckanext.collection import internal, types from .base import Data @@ -29,9 +29,9 @@ class BaseSaData( """Data source for custom SQL statement.""" _data: cached_property[TStatement] - use_naive_filters: bool = shared.configurable_attribute(False) - use_naive_search: bool = shared.configurable_attribute(False) - session: AlchemySession = shared.configurable_attribute( + use_naive_filters: bool = internal.configurable_attribute(False) + use_naive_search: bool = internal.configurable_attribute(False) + session: AlchemySession = internal.configurable_attribute( default_factory=lambda self: model.Session, ) @@ -125,7 +125,7 @@ def statement_with_sorting(self, stmt: TStatement): if not sort: return stmt - column, desc = shared.parse_sort(sort) + column, desc = internal.parse_sort(sort) if column not in self.attached.columns.sortable: log.warning("Unexpected sort value: %s", column) @@ -145,7 +145,7 @@ def statement_with_sorting(self, stmt: TStatement): class StatementSaData(BaseSaData[Select, types.TData, types.TDataCollection]): """Data source for custom SQL statement.""" - statement: Any = shared.configurable_attribute(None) + statement: Any = internal.configurable_attribute(None) def get_base_statement(self): """Return statement with minimal amount of columns and filters.""" @@ -155,7 +155,7 @@ def get_base_statement(self): class UnionSaData(BaseSaData[Select, types.TData, types.TDataCollection]): """Data source for custom SQL statement.""" - statements: Iterable[GenerativeSelect] = shared.configurable_attribute( + statements: Iterable[GenerativeSelect] = internal.configurable_attribute( default_factory=lambda self: [], ) @@ -174,19 +174,19 @@ class ModelData(BaseSaData[Select, types.TData, types.TDataCollection]): is_scalar: return model instance instead of columns set. """ - model: Any = shared.configurable_attribute(None) - is_scalar: bool = shared.configurable_attribute(False) + model: Any = internal.configurable_attribute(None) + is_scalar: bool = internal.configurable_attribute(False) - static_columns: list[sa.Column[Any] | Label[Any]] = shared.configurable_attribute( + static_columns: list[sa.Column[Any] | Label[Any]] = internal.configurable_attribute( default_factory=lambda self: [], ) - static_filters: list[Any] = shared.configurable_attribute( + static_filters: list[Any] = internal.configurable_attribute( default_factory=lambda self: [], ) - static_sources: dict[str, Any] = shared.configurable_attribute( + static_sources: dict[str, Any] = internal.configurable_attribute( default_factory=lambda self: {}, ) - static_joins: list[tuple[str, Any, bool]] = shared.configurable_attribute( + static_joins: list[tuple[str, Any, bool]] = internal.configurable_attribute( default_factory=lambda self: [], ) diff --git a/ckanext/collection/utils/data/stream.py b/ckanext/collection/utils/data/stream.py index c6d8b6a..08adf96 100644 --- a/ckanext/collection/utils/data/stream.py +++ b/ckanext/collection/utils/data/stream.py @@ -3,7 +3,7 @@ import csv import logging -from ckanext.collection import shared, types +from ckanext.collection import internal, types from .base import Data @@ -11,7 +11,7 @@ class CsvFileData(Data[types.TData, types.TDataCollection]): - source = shared.configurable_attribute() + source = internal.configurable_attribute() def compute_data(self): with open(self.source) as src: diff --git a/ckanext/collection/utils/db_connection.py b/ckanext/collection/utils/db_connection.py index dcc8465..49228f9 100644 --- a/ckanext/collection/utils/db_connection.py +++ b/ckanext/collection/utils/db_connection.py @@ -9,11 +9,11 @@ from ckanext.datastore.backend.postgres import get_read_engine -from ckanext.collection import shared, types +from ckanext.collection import internal, types -class DbConnection(types.BaseDbConnection, shared.Domain[types.TDbCollection]): - engine: Engine = shared.configurable_attribute() +class DbConnection(types.BaseDbConnection, internal.Domain[types.TDbCollection]): + engine: Engine = internal.configurable_attribute() @property def inspector(self): @@ -21,8 +21,8 @@ def inspector(self): class UrlDbConnection(DbConnection[types.TDbCollection]): - url: str = shared.configurable_attribute() - engine_options: dict[str, Any] = shared.configurable_attribute( + url: str = internal.configurable_attribute() + engine_options: dict[str, Any] = internal.configurable_attribute( default_factory=lambda self: {}, ) @@ -32,12 +32,12 @@ def __init__(self, obj: types.TDbCollection, /, **kwargs: Any): class CkanDbConnection(DbConnection[types.TDbCollection]): - engine: Engine = shared.configurable_attribute( + engine: Engine = internal.configurable_attribute( default_factory=lambda self: cast(Engine, model.meta.engine), ) class DatastoreDbConnection(DbConnection[types.TDbCollection]): - engine: Engine = shared.configurable_attribute( + engine: Engine = internal.configurable_attribute( default_factory=lambda self: get_read_engine(), ) diff --git a/ckanext/collection/utils/filters.py b/ckanext/collection/utils/filters.py index 4da9c76..f90a11b 100644 --- a/ckanext/collection/utils/filters.py +++ b/ckanext/collection/utils/filters.py @@ -4,12 +4,12 @@ import sqlalchemy as sa -from ckanext.collection import shared, types +from ckanext.collection import internal, types class Filters( types.BaseFilters, - shared.Domain[types.TDataCollection], + internal.Domain[types.TDataCollection], ): """Information about UI filters. @@ -24,10 +24,10 @@ def make_filters(self) -> Iterable[types.Filter[Any]]: def make_actions(self) -> Iterable[types.Filter[Any]]: return [] - static_filters: Iterable[types.Filter[Any]] = shared.configurable_attribute( + static_filters: Iterable[types.Filter[Any]] = internal.configurable_attribute( default_factory=lambda self: [], ) - static_actions: Iterable[types.Filter[Any]] = shared.configurable_attribute( + static_actions: Iterable[types.Filter[Any]] = internal.configurable_attribute( default_factory=lambda self: [], ) @@ -42,7 +42,7 @@ class DbFilters(Filters[types.TDbCollection]): class TableFilters(DbFilters[types.TDbCollection]): - table: str = shared.configurable_attribute() + table: str = internal.configurable_attribute() def make_filters(self) -> Iterable[types.Filter[Any]]: return [ diff --git a/ckanext/collection/utils/pager.py b/ckanext/collection/utils/pager.py index 40096ac..7d7ba68 100644 --- a/ckanext/collection/utils/pager.py +++ b/ckanext/collection/utils/pager.py @@ -2,12 +2,10 @@ from typing import Any -import ckan.plugins.toolkit as tk +from ckanext.collection import internal, types -from ckanext.collection import shared, types - -class Pager(types.BasePager, shared.Domain[types.TDataCollection]): +class Pager(types.BasePager, internal.Domain[types.TDataCollection]): """Pagination logic for collections. This class must be abstract enough to fit into majority of pager @@ -24,18 +22,26 @@ class ClassicPager(Pager[types.TDataCollection]): rows_per_page: max number of items per page """ - page: int = shared.configurable_attribute(1) - rows_per_page: int = shared.configurable_attribute(10) + prioritize_params: int = internal.configurable_attribute(True) + page: int = internal.configurable_attribute(1) + rows_per_page: int = internal.configurable_attribute(10) def __init__(self, obj: types.TDataCollection, /, **kwargs: Any): """Use `page` and `rows_per_page` parameters.""" super().__init__(obj, **kwargs) - self.page = tk.h.get_page_number(self.attached.params, "page", self.page) - self.rows_per_page = tk.h.get_page_number( - self.attached.params, - "rows_per_page", - self.rows_per_page, - ) + + if self.prioritize_params: + self.page = max(int(self.attached.params.get("page", self.page)), 1) + + self.rows_per_page = max( + int( + self.attached.params.get( + "rows_per_page", + self.rows_per_page, + ), + ), + 0, + ) @property def start(self) -> int: diff --git a/ckanext/collection/utils/serialize/__init__.py b/ckanext/collection/utils/serialize/__init__.py index 66a4ae3..ac7ff13 100644 --- a/ckanext/collection/utils/serialize/__init__.py +++ b/ckanext/collection/utils/serialize/__init__.py @@ -3,6 +3,7 @@ import abc import csv import io +import itertools import json import operator from functools import reduce @@ -15,7 +16,7 @@ import ckan.plugins.toolkit as tk -from ckanext.collection import shared, types +from ckanext.collection import internal, types __all__ = [ "Serializer", @@ -54,7 +55,7 @@ def basic_row_dictizer(row: Any) -> dict[str, Any]: class Serializer( types.BaseSerializer, - shared.Domain[types.TDataCollection], + internal.Domain[types.TDataCollection], Generic[types.TSerialized, types.TDataCollection], ): """Base collection serializer. @@ -71,10 +72,12 @@ class Serializer( """ - value_serializers: dict[str, types.ValueSerializer] = shared.configurable_attribute( - default_factory=lambda self: {}, + value_serializers: dict[str, types.ValueSerializer] = ( + internal.configurable_attribute( + default_factory=lambda self: {}, + ) ) - row_dictizer: Callable[[Any], dict[str, Any]] = shared.configurable_attribute( + row_dictizer: Callable[[Any], dict[str, Any]] = internal.configurable_attribute( basic_row_dictizer, ) @@ -100,11 +103,16 @@ def serialize_value(self, value: Any, name: str, record: Any): def dictize_row(self, row: Any) -> dict[str, Any]: """Transform single data record into serializable dictionary.""" result = self.row_dictizer(row) + if fields := self.attached.columns.names: + visible = self.attached.columns.visible + else: + fields = list(result) + visible = set(fields) return { field: self.serialize_value(result[field], field, row) - for field in self.attached.columns.visible - if field in result + for field in fields + if field in result and field in visible } @@ -150,31 +158,29 @@ def render(self) -> str: class CsvSerializer(StreamingSerializer[str, types.TDataCollection]): """Serialize collection into CSV document.""" - def get_writer(self, buff: io.StringIO): - return csv.DictWriter( - buff, - fieldnames=self.get_fieldnames(), - extrasaction="ignore", - ) - - def get_fieldnames(self) -> list[str]: - return [ - name - for name in self.attached.columns.names - if name in self.attached.columns.visible - ] + def get_writer(self, buff: io.StringIO, fieldnames: list[str]): + return csv.DictWriter(buff, fieldnames, extrasaction="ignore") def get_header_row(self, writer: csv.DictWriter[str]) -> dict[str, str]: return { col: self.attached.columns.labels.get(col, col) for col in writer.fieldnames } - def prepare_row(self, row: Any, writer: csv.DictWriter[str]) -> dict[str, Any]: + def prepare_row(self, row: Any) -> dict[str, Any]: return self.dictize_row(row) def stream(self) -> Iterable[str]: buff = io.StringIO() - writer = self.get_writer(buff) + data = iter(self.attached.data) + fieldnames = self.attached.columns.names + if not fieldnames: + empty = object() + row = next(data, empty) + if row is not empty: + data = itertools.chain([row], data) + fieldnames = list(self.prepare_row(row)) + + writer = self.get_writer(buff, fieldnames) writer.writerow(self.get_header_row(writer)) @@ -182,8 +188,8 @@ def stream(self) -> Iterable[str]: buff.seek(0) buff.truncate() - for row in self.attached.data: - writer.writerow(self.prepare_row(row, writer)) + for row in data: + writer.writerow(self.prepare_row(row)) yield buff.getvalue() buff.seek(0) buff.truncate() @@ -192,10 +198,12 @@ def stream(self) -> Iterable[str]: class JsonlSerializer(StreamingSerializer[str, types.TDataCollection]): """Serialize collection into JSONL lines.""" + encoder = internal.configurable_attribute(json.JSONEncoder(default=str)) + def stream(self) -> Iterable[str]: buff = io.StringIO() for row in map(self.dictize_row, self.attached.data): - json.dump(row, buff) + buff.write(self.encoder.encode(row)) yield buff.getvalue() yield "\n" buff.seek(0) @@ -205,8 +213,10 @@ def stream(self) -> Iterable[str]: class JsonSerializer(StreamingSerializer[str, types.TDataCollection]): """Serialize collection into single JSON document.""" + encoder = internal.configurable_attribute(json.JSONEncoder(default=str)) + def stream(self): - yield json.dumps( + yield self.encoder.encode( [self.dictize_row(row) for row in self.attached.data], ) @@ -217,14 +227,14 @@ class ChartJsSerializer(StreamingSerializer[str, types.TDataCollection]): """ - label_column: str = shared.configurable_attribute("") - dataset_columns: list[str] = shared.configurable_attribute( + label_column: str = internal.configurable_attribute("") + dataset_columns: list[str] = internal.configurable_attribute( default_factory=lambda self: [], ) - dataset_labels: dict[str, str] = shared.configurable_attribute( + dataset_labels: dict[str, str] = internal.configurable_attribute( default_factory=lambda self: {}, ) - colors: dict[str, str] = shared.configurable_attribute( + colors: dict[str, str] = internal.configurable_attribute( default_factory=lambda self: {}, ) @@ -257,12 +267,12 @@ def stream(self): class HtmlSerializer(RenderableSerializer[types.TDataCollection]): """Serialize collection into HTML document.""" - ensure_dictized: bool = shared.configurable_attribute(False) + ensure_dictized: bool = internal.configurable_attribute(False) - main_template: str = shared.configurable_attribute( + main_template: str = internal.configurable_attribute( "collection/serialize/html/main.html", ) - record_template: str = shared.configurable_attribute( + record_template: str = internal.configurable_attribute( "collection/serialize/html/record.html", ) @@ -281,30 +291,30 @@ def stream(self): class TableSerializer(HtmlSerializer[types.TDataCollection]): """Serialize collection into HTML table.""" - main_template: str = shared.configurable_attribute( + main_template: str = internal.configurable_attribute( "collection/serialize/table/main.html", ) - table_template: str = shared.configurable_attribute( + table_template: str = internal.configurable_attribute( "collection/serialize/table/table.html", ) - record_template: str = shared.configurable_attribute( + record_template: str = internal.configurable_attribute( "collection/serialize/table/record.html", ) - counter_template: str = shared.configurable_attribute( + counter_template: str = internal.configurable_attribute( "collection/serialize/table/counter.html", ) - pager_template: str = shared.configurable_attribute( + pager_template: str = internal.configurable_attribute( "collection/serialize/table/pager.html", ) - form_template: str = shared.configurable_attribute( + form_template: str = internal.configurable_attribute( "collection/serialize/table/form.html", ) - filter_template: str = shared.configurable_attribute( + filter_template: str = internal.configurable_attribute( "collection/serialize/table/filter.html", ) - prefix: str = shared.configurable_attribute("collection-table") - base_class: str = shared.configurable_attribute("collection") + prefix: str = internal.configurable_attribute("collection-table") + base_class: str = internal.configurable_attribute("collection") @property def form_id(self): @@ -318,33 +328,33 @@ def table_id(self): class HtmxTableSerializer(TableSerializer[types.TDataCollection]): """Serialize collection into HTML table.""" - main_template: str = shared.configurable_attribute( + main_template: str = internal.configurable_attribute( "collection/serialize/htmx_table/main.html", ) - table_template: str = shared.configurable_attribute( + table_template: str = internal.configurable_attribute( "collection/serialize/htmx_table/table.html", ) - record_template: str = shared.configurable_attribute( + record_template: str = internal.configurable_attribute( "collection/serialize/htmx_table/record.html", ) - counter_template: str = shared.configurable_attribute( + counter_template: str = internal.configurable_attribute( "collection/serialize/htmx_table/counter.html", ) - pager_template: str = shared.configurable_attribute( + pager_template: str = internal.configurable_attribute( "collection/serialize/htmx_table/pager.html", ) - form_template: str = shared.configurable_attribute( + form_template: str = internal.configurable_attribute( "collection/serialize/htmx_table/form.html", ) - filter_template: str = shared.configurable_attribute( + filter_template: str = internal.configurable_attribute( "collection/serialize/htmx_table/filter.html", ) - debug: bool = shared.configurable_attribute(False) - push_url: bool = shared.configurable_attribute(False) - base_class: str = shared.configurable_attribute("htmx-collection") + debug: bool = internal.configurable_attribute(False) + push_url: bool = internal.configurable_attribute(False) + base_class: str = internal.configurable_attribute("htmx-collection") - render_url: str = shared.configurable_attribute( + render_url: str = internal.configurable_attribute( default_factory=lambda self: tk.h.url_for( "ckanext-collection.render", name=self.attached.name, diff --git a/docs/detailed.md b/docs/detailed.md index ff28b95..08600c0 100644 --- a/docs/detailed.md +++ b/docs/detailed.md @@ -1,75 +1,5 @@ # Deep dive - - -## Usage - -Collections can be registered via `ckanext.collection.interfaces.ICollection` -or via CKAN signals. Registered collection can be initialized anywhere in code -using helper and can be used in a number of generic endpoints that render -collection as HTML of export it into different formats. - -Registration via interface: - -```python -from ckanext.collection.interfaces import CollectionFactory, ICollection - - -class MyPlugin(p.SingletonPlugin): - p.implements(ICollection, inherit=True) - - def get_collection_factories(self) -> dict[str, CollectionFactory]: - return { - "my-collection": MyCollection, - } - -``` - -`get_collection_factories` returns a dictionary with collection names(letters, -digits, underscores and hyphens are allowed) as keys, and collection factories -as values. In most generic case, collection factory is just a collection -class. But you can use any function with signature `(str, dict[str, Any], -**Any) -> Collection` as a factory. For example, the following function is a -valid collection factory and it can be returned from `get_collection_factories` - -```python -def my_factory(name: str, params: dict[str, Any], **kwargs: Any): - """Collection that shows 100 numbers per page""" - params.setdefault("rows_per_page", 100) - return MyCollection(name, params, **kwargs) -``` - -If you want to register a collection only if collection plugin is enabled, you -can use CKAN signals instead of wrapping import from ckanext-collection into -try except block: - -```python - -class MyPlugin(p.SingletonPlugin): - p.implements(p.ISignal) - - def get_signal_subscriptions(self) -> types.SignalMapping: - return { - tk.signals.ckanext.signal("collection:register_collections"): [ - self.collect_collection_factories, - ], - } - - def collect_collection_factories(self, sender: None): - return { - "my-collection": MyCollection, - } - -``` - -Data returned from the signal subscription is exactly the same as from -`ICollection.get_collection_factories`. The only difference, signal -subscription accepts `sender` argument which is always `None`, due to internal -implementation of signals. - - -## Documentation - ### Overview The goal of this plugin is to supply you with generic classes for processing @@ -206,56 +136,6 @@ for user in col: print(user["name"]) ``` -### Collection intialization - -Collection constructor has two mandatory arguments: name and parameters. - -Name is used as collection identifier and it's better to keep this value unique -accross collections. For example, name is used for computing HTML table `id` -attribute when serializing collection as an HTML table. If you render two -collections with the same name, you'll get two identical IDs on the page. - -Params are usually used by data and pager service for searching, sorting, -etc. Collection does not keep all the params. Instead, it stores only items -with key prefixed by `:`. I.e, if collection has name `hello`, and you -pass `{"hello:a": 1, "b": 2, "world:c": 3}`, collection will remove `b`(because -it has no collection name plus colon prefix) and `world:c` members(because it -uses `world` instead of `hello` in prefix). As for `hello:a`, collection strips -`:` prefix from it. So, in the end, collection stores `{"a": 1}`. You -can check params of the collection using `params` attribute: - -```python -col = Collection("hello", {"hello:a": 1, "b": 2, "world:c": 3}) -assert col.params == {"a": 1} - -col = Collection("world", {"hello:a": 1, "b": 2, "world:c": 3}) -assert col.params == {"c": 3} -``` - -It allows you rendering and processing multiple collections simultaneously on -the same page. Imagine that you have collection `users` and collection -`packages`. You want to see second page of `users` and fifth of -`packages`. Submit the query string `?users:page=2&packages:page=5` and -initialize collections using the following code: - -```python -from ckan.logic import parse_params -from ckan.plugins import toolkit as tk - -params = parse_params(tk.request.args) - -users = ModelCollection( - "users", params, - data_settings={"model": model.User} -) -packages = ModelCollection( - "packages", params, - data_settings={"model": model.Package} -) - -assert users.pager.page == 2 -assert packages.pager.page == 5 -``` ### Services @@ -288,147 +168,6 @@ built-in collections, `DbCollection` has additional service called `db_connection` that can communicate with DB. -When a collection is created, it creates an instance of each service using -service factories and service settings. Base collection and all collections -that extend it already have all details for initializing every service: - -```python -col = Collection("name", {}) -print(f"""Services: - {col.data=}, - {col.pager=}, - {col.serializer=}, - {col.columns=}, - {col.filters=}""") - -assert list(col) == [] -``` - -This collection has no data. We can initialize an instance of `StaticData` and -replace the existing data service of the collection with new `StaticData` -instance. - -Every service has one required argument: collection that owns the service. All -other arguments are used as a service settings and must be passed by -name. Remember, all the classes used in this manual are available inside -`ckanext.collection.utils`: - -```python -static_data = StaticData(col, data=[1,2,3]) -col.replace_service(static_data) - -assert list(col) == [1, 2, 3] -``` - -Look at `Colletion.replace_service`. It accepts only service instance. There is -no need to pass the name of the service that must be replaced - collection can -understand it without help. And pay attention to the first argument of service -constructor. It must be the collection that is going to use the service. Some -services may work even if you pass a random value as the first argument, but -it's an exceptional situation and one shouldn't rely on it. - -If existing collection is no longer used and you are going to create a new one, -you sometimes want to reuse a service from an existing collection. Just to -avoid creating the service and calling `Collection.replace_service`, which will -save you two lines of code. In this case, use `_instance` parameter of -the collection constructor: - -```python -another_col = Collection("another-name", {}, data_instance=col.data) -assert list(another_col) == [1, 2, 3] -``` - -If you do such thing, make sure you are not using old collection anymore. You -just transfered one of its services to another collection, so there is no -guarantees that old collection with detached service will function properly. - -It's usually better to customize service factory, instead of passing existing -customized instance of the service around. You can tell which class to use for -making an instance of a service using `_factory` parameter of the -collection contstructor: - -```python -col = Collection("name", {}, data_factory=StaticData) -assert list(col) == [] -``` - -But in this way we cannot specify the `data` attribute of the `data` factory! -No worries, there are multiple ways to overcome this problem. First of all, all -the settings of the service are available as its attributes. It means that -`data` setting is the same as `data` attribute of the service. If you can do -`StaticData(..., data=...)`, you can as well do `service = StaticData(...); -service.data = ...`: - -```python -col = Collection("name", {}, data_factory=StaticData) -col.data.data = [1, 2, 3] -assert list(col) == [1, 2, 3] -``` - -**Note**: `data` service caches its data. If you already accessed data property -from the `StaticData`, assigning an new value doesn't have any effect because -of the cache. You have to call `col.data.refresh_data()` after assigning to -rebuild the cache. - -But there is a better way. You can pass `_settings` dictionary to the -collection constructor and it will be passed down into corresponding service -factory: - -```python -col = Collection( - "name", {}, - data_factory=StaticData, - data_settings={"data": [1, 2, 3]} -) -assert list(col) == [1, 2, 3] -``` - - -It works well for individual scenarios, but when you are creating a lot of -collections with the static data, you want to omit some standard parameters. In -this case you should define a new class that extends Collection and declares -`Factory` attribute: - -```python -class MyCollection(Collection): - DataFactory = StaticData - -col = MyCollection( - "name", {}, - data_settings={"data": [1, 2, 3]} -) -assert list(col) == [1, 2, 3] -``` - -You still can pass `data_factory` into `MyCollection` constructor to override -data service factory. But now, by default, `StaticData` is used when it's not -specified explicitly. - -Finally, if you want to create a subclass of service, that has a specific value -of certain attributes, i.e something like this: - -```python -class OneTwoThreeData(StaticData): - data = [1, 2, 3] -``` - -you can use `Service.with_attributes(attr_name=attr_value)` factory method. It -produce a new service class(factory) with specified attributes bound to a -static value. For example, that's how we can define a collection, that always -contains `[1, 2, 3]`: - -```python -class MyCollection(Collection): - DataFactory = StaticData.with_attributes(data=[1, 2, 3]) - -col = MyCollection("name", {}) -assert list(col) == [1, 2, 3] -``` - -Now you don't have to specify `data_factory` or `data_settings` when creating a -collection. It will always use `StaticData` with `data` set to `[1, 2, 3]` -. Make sure you mean it, because now you cannot override the data using -`data_settings`. #### Common logic @@ -505,299 +244,3 @@ alphabetical order, but this implementation detail may change in future without notice. TODO: with_attributes - -#### Data service - -This service produces the data for collection. Every data service must: - -* be Iterable and iterate over all available records by default -* define `total` property, that reflects number of available records so that - `len(list(data)) == data.total` -* define `range(start: Any, end: Any)` method that returns slice of the data - -Base class for data services - `Data` - already contains a simple version of -this logic. You need to define only one method to make you custom -implementations: `compute_data()`. When data if accessed for the first time, -`compute_data` is called. Its result cached and used for iteration in -for-loops, slicing via `range` method and size measurement via `total` -property. - - -```python -class CustomData(Data): - def compute_data(self) -> Any: - return "abcdefghijklmnopqrstuvwxyz" - -col = Collection("name", {}, data_factory=CustomData) -assert list(col) == ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] -assert col.data.total == 26 -assert col.data.range(-3, None) == "xyz" - -``` - -If you need more complex data source, make sure you defined `__iter__`, -`total`, and `range`: - -```python -class CustomData(Data): - names = configurable_attribute(default_factory=["Anna", "Henry", "Mary"]) - - @property - def total(self): - return len(self.names) - - def __iter__(self): - yield from sorted(self.names) - - def range(self, start: Any, end: Any): - if not isinstance(start, str) or not isinstance(end, str): - return [] - - for name in self: - if name < start: - continue - if name > end: - break - yield name - -``` - - -#### Pager service - -Pager service sets the upper and lower bounds on data used by -collection. Default pager used by collection relies on numeric `start`/`end` -values. But it's possible to define custom pager that uses alphabetical or -temporal bounds, as long as `range` method of your custom data service supports -these bounds. - -Standard pager(`ClassicPager`) has two configurable attributes: `page`(default: -1) and `rows_per_page`(default: 10). - -```python -col = StaticCollection("name", {}) -assert col.pager.page == 1 -assert col.pager.rows_per_page == 10 -``` - -Because of these values you see only first 10 records from data when iterating -the collection. Let's change pager settings: - -```python -col = StaticCollection( - "name", {}, - data_settings={"data": range(1, 100)}, - pager_settings={"page": 3, "rows_per_page": 6} -) -assert list(col) == [13, 14, 15, 16, 17, 18] -``` - -Pagination details are often passed with search parameters and have huge -implact on the required data frame. Because of it, if `pager_settings` are -missing, `ClassicPager` will look for settings inside collection -parameters(second argument of the collection constructor). But in this case, -pager will use only items that has `:` prefix: - -```python -col = StaticCollection( - "xxx", - {"xxx:page": 3, "xxx:rows_per_page": 6}, - data_settings={"data": range(1, 100)} -) -assert list(col) == [13, 14, 15, 16, 17, 18] - -col = StaticCollection( - "xxx", - {"page": 3, "rows_per_page": 6}, - data_settings={"data": range(1, 100)} -) -assert list(col) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - -``` - -#### Serializer service - -Serializer converts data into textual, binary or any other alternative -representation. For example, if you want to compute records produced by the -`data` service of the collection into pandas' DataFrame, you should probably -use serializer. - -Serializers are main users of columns service, because it contains details -about specific data columns. And serializers often iterate data service -directly(ignoring `range` method), to serialize all available records. - -The only required method for serializer is `serialize`. This method must return -an data from `data` service transformed into format provided by serializer. For -example, `JsonSerializer` returns string with JSON-encoded data. - -You are not restricted by textual or binary formats. Serializer that transforms -data into pandas' DataFrame is completely valid version of the serializer. - -```python -class NewLineSerializer(Serializer): - def serialize(self): - result = "" - for item in self.attached.data: - result += str(item) + "\n" - - return result - -col = StaticCollection( - "name", {}, - serializer_factory=NewLineSerializer, - data_settings={"data": [1, 2, 3]} -) -assert "".join(col.serializer.serialize()) == "1\n2\n3\n" -``` - -#### Columns service - -This service contains additional information about separate columns of data -records. It defines following settings: - -* names: all available column names. Used by other settings of columns service -* hidden: columns that should not be shown by serializer. Used by serializer - services -* visible: columns that must be shown by serializer. Used by serializer - services -* sortable: columns that support sorting. Used by data services -* filterable: columns that support filtration/facetting. Used by data services -* searchable: columns that support search by partial match. Used by data - services -* labels: human readable labels for columns. Used by serializer services - -This service contains information used by other service, so defining additional -attributes here is completely normal. For example, some custom serializer, that -serializes data into ORC, can expect `orc_format` attribute in the `columns` -service to be available. So you can add as much additional column related -details as required into this service. - -#### Filters service - -This service used only by HTML table serializers at the moment. It has two -configurable attributes `static_filters` and `static_actions`. `static_filters` -are used for building search form for the data table. `static_actions` are not -used, but you can put into it details about batch or record-level actions and -use these details to extend one of standard serializers. For example, -ckanext-admin-panel defines allowed actions (remove, restore, hide) for content -and creates custom templates that are referring these actions. - - -### Core classes and usage examples - -TBA - -#### Data -TBA - -#### StaticData -TBA - -#### BaseSaData -TBA - -#### StatementSaData -TBA - -#### UnionSaData -TBA - -#### ModelData -TBA - -#### ApiData -TBA - -#### ApiSearchData -TBA - -#### ApiListData -TBA - -#### Pager -TBA - -#### ClassicPager -TBA - -#### Columns -TBA - -#### Filters -TBA - -#### Serializer -TBA - -#### CsvSerializer -TBA - -#### JsonlSerializer -TBA - -#### JsonSerializer -TBA - -#### HtmlSerializer -TBA - -#### TableSerializer -TBA - -#### HtmxTableSerializer -TBA - -## Config settings - -```ini -# Names of registered collections that are viewable by any visitor, including -# anonymous. -# (optional, default: ) -ckanext.collection.auth.anonymous_collections = - -# Names of registered collections that are viewable by any authenticated -# user. -# (optional, default: ) -ckanext.collection.auth.authenticated_collections = - -# Add HTMX asset to pages. Enable this option if you are using CKAN v2.10 -# (optional, default: false) -ckanext.collection.include_htmx_asset = false - -# Initialize CKAN JS modules every time HTMX fetches HTML from the server. -# (optional, default: false) -ckanext.collection.htmx_init_modules = false - -# Import path for serializer used by CSV export endpoint. -# (optional, default: ckanext.collection.utils.serialize:CsvSerializer) -ckanext.collection.export.csv.serializer = ckanext.collection.utils.serialize:CsvSerializer - -# Import path for serializer used by JSON export endpoint. -# (optional, default: ckanext.collection.utils.serialize:JsonSerializer) -ckanext.collection.export.json.serializer = ckanext.collection.utils.serialize:JsonSerializer - -# Import path for serializer used by JSONl export endpoint. -# (optional, default: ckanext.collection.utils.serialize:JsonlSerializer) -ckanext.collection.export.jsonl.serializer = ckanext.collection.utils.serialize:JsonlSerializer - -# Import path for serializer used by `format`-export endpoint. -# (optional, default: ) -ckanext.collection.export..serializer = - -``` - -## Integrations - -### [ckanext-admin-panel](https://github.com/mutantsan/ckanext-admin-panel) - -To enable configuration form of ckanext-collection in the admin panel, enable -the following arbitrary schema - -```ini -scheming.arbitrary_schemas = - ckanext.collection:ap_config.yaml -``` - -## License - -[AGPL](https://www.gnu.org/licenses/agpl-3.0.en.html) diff --git a/docs/structure/collection.md b/docs/structure/collection.md index e9e3a5f..b481f2f 100644 --- a/docs/structure/collection.md +++ b/docs/structure/collection.md @@ -1 +1,197 @@ # Collection + + +Collection constructor has two mandatory arguments: name and parameters. + +Name is used as collection identifier and it's better to keep this value unique +accross collections. For example, name is used for computing HTML table `id` +attribute when serializing collection as an HTML table. If you render two +collections with the same name, you'll get two identical IDs on the page. + +Params are usually used by data and pager service for searching, sorting, +etc. Collection does not keep all the params. Instead, it stores only items +with key prefixed by `:`. I.e, if collection has name `hello`, and you +pass `{"hello:a": 1, "b": 2, "world:c": 3}`, collection will remove `b`(because +it has no collection name plus colon prefix) and `world:c` members(because it +uses `world` instead of `hello` in prefix). As for `hello:a`, collection strips +`:` prefix from it. So, in the end, collection stores `{"a": 1}`. You +can check params of the collection using `params` attribute: + +```python +col = Collection("hello", {"hello:a": 1, "b": 2, "world:c": 3}) +assert col.params == {"a": 1} + +col = Collection("world", {"hello:a": 1, "b": 2, "world:c": 3}) +assert col.params == {"c": 3} +``` + +It allows you rendering and processing multiple collections simultaneously on +the same page. Imagine that you have collection `users` and collection +`packages`. You want to see second page of `users` and fifth of +`packages`. Submit the query string `?users:page=2&packages:page=5` and +initialize collections using the following code: + +```python +from ckan.logic import parse_params +from ckan.plugins import toolkit as tk + +params = parse_params(tk.request.args) + +users = ModelCollection( + "users", params, + data_settings={"model": model.User} +) +packages = ModelCollection( + "packages", params, + data_settings={"model": model.Package} +) + +assert users.pager.page == 2 +assert packages.pager.page == 5 +``` + +--- + +When a collection is created, it creates an instance of each service using +service factories and service settings. Base collection and all collections +that extend it already have all details for initializing every service: + +```python +col = Collection("name", {}) +print(f"""Services: + {col.data=}, + {col.pager=}, + {col.serializer=}, + {col.columns=}, + {col.filters=}""") + +assert list(col) == [] +``` + +This collection has no data. We can initialize an instance of `StaticData` and +replace the existing data service of the collection with new `StaticData` +instance. + +Every service has one required argument: collection that owns the service. All +other arguments are used as a service settings and must be passed by +name. Remember, all the classes used in this manual are available inside +`ckanext.collection.utils`: + +```python +static_data = StaticData(col, data=[1,2,3]) +col.replace_service(static_data) + +assert list(col) == [1, 2, 3] +``` + +Look at `Colletion.replace_service`. It accepts only service instance. There is +no need to pass the name of the service that must be replaced - collection can +understand it without help. And pay attention to the first argument of service +constructor. It must be the collection that is going to use the service. Some +services may work even if you pass a random value as the first argument, but +it's an exceptional situation and one shouldn't rely on it. + +If existing collection is no longer used and you are going to create a new one, +you sometimes want to reuse a service from an existing collection. Just to +avoid creating the service and calling `Collection.replace_service`, which will +save you two lines of code. In this case, use `_instance` parameter of +the collection constructor: + +```python +another_col = Collection("another-name", {}, data_instance=col.data) +assert list(another_col) == [1, 2, 3] +``` + +If you do such thing, make sure you are not using old collection anymore. You +just transfered one of its services to another collection, so there is no +guarantees that old collection with detached service will function properly. + +It's usually better to customize service factory, instead of passing existing +customized instance of the service around. You can tell which class to use for +making an instance of a service using `_factory` parameter of the +collection contstructor: + +```python +col = Collection("name", {}, data_factory=StaticData) +assert list(col) == [] +``` + +But in this way we cannot specify the `data` attribute of the `data` factory! +No worries, there are multiple ways to overcome this problem. First of all, all +the settings of the service are available as its attributes. It means that +`data` setting is the same as `data` attribute of the service. If you can do +`StaticData(..., data=...)`, you can as well do `service = StaticData(...); +service.data = ...`: + +```python +col = Collection("name", {}, data_factory=StaticData) +col.data.data = [1, 2, 3] +assert list(col) == [1, 2, 3] +``` + +**Note**: `data` service caches its data. If you already accessed data property +from the `StaticData`, assigning an new value doesn't have any effect because +of the cache. You have to call `col.data.refresh_data()` after assigning to +rebuild the cache. + +But there is a better way. You can pass `_settings` dictionary to the +collection constructor and it will be passed down into corresponding service +factory: + +```python +col = Collection( + "name", {}, + data_factory=StaticData, + data_settings={"data": [1, 2, 3]} +) +assert list(col) == [1, 2, 3] +``` + + +It works well for individual scenarios, but when you are creating a lot of +collections with the static data, you want to omit some standard parameters. In +this case you should define a new class that extends Collection and declares +`Factory` attribute: + +```python +class MyCollection(Collection): + DataFactory = StaticData + +col = MyCollection( + "name", {}, + data_settings={"data": [1, 2, 3]} +) +assert list(col) == [1, 2, 3] +``` + +You still can pass `data_factory` into `MyCollection` constructor to override +data service factory. But now, by default, `StaticData` is used when it's not +specified explicitly. + +Finally, if you want to create a subclass of service, that has a specific value +of certain attributes, i.e something like this: + +```python +class OneTwoThreeData(StaticData): + data = [1, 2, 3] +``` + +you can use `Service.with_attributes(attr_name=attr_value)` factory method. It +produce a new service class(factory) with specified attributes bound to a +static value. For example, that's how we can define a collection, that always +contains `[1, 2, 3]`: + +```python +class MyCollection(Collection): + DataFactory = StaticData.with_attributes(data=[1, 2, 3]) + +col = MyCollection("name", {}) +assert list(col) == [1, 2, 3] +``` + +Now you don't have to specify `data_factory` or `data_settings` when creating a +collection. It will always use `StaticData` with `data` set to `[1, 2, 3]` +. Make sure you mean it, because now you cannot override the data using +`data_settings`. + +--- diff --git a/docs/structure/columns.md b/docs/structure/columns.md index 1be3abf..fb15ed5 100644 --- a/docs/structure/columns.md +++ b/docs/structure/columns.md @@ -1 +1,22 @@ # Columns + + +This service contains additional information about separate columns of data +records. It defines following settings: + +* names: all available column names. Used by other settings of columns service +* hidden: columns that should not be shown by serializer. Used by serializer + services +* visible: columns that must be shown by serializer. Used by serializer + services +* sortable: columns that support sorting. Used by data services +* filterable: columns that support filtration/facetting. Used by data services +* searchable: columns that support search by partial match. Used by data + services +* labels: human readable labels for columns. Used by serializer services + +This service contains information used by other service, so defining additional +attributes here is completely normal. For example, some custom serializer, that +serializes data into ORC, can expect `orc_format` attribute in the `columns` +service to be available. So you can add as much additional column related +details as required into this service. diff --git a/docs/structure/data.md b/docs/structure/data.md index 599c41b..6336292 100644 --- a/docs/structure/data.md +++ b/docs/structure/data.md @@ -1 +1,56 @@ # Data + + +This service produces the data for collection. Every data service must: + +* be Iterable and iterate over all available records by default +* define `total` property, that reflects number of available records so that + `len(list(data)) == data.total` +* define `range(start: Any, end: Any)` method that returns slice of the data + +Base class for data services - `Data` - already contains a simple version of +this logic. You need to define only one method to make you custom +implementations: `compute_data()`. When data if accessed for the first time, +`compute_data` is called. Its result cached and used for iteration in +for-loops, slicing via `range` method and size measurement via `total` +property. + + +```python +class CustomData(Data): + def compute_data(self) -> Any: + return "abcdefghijklmnopqrstuvwxyz" + +col = Collection("name", {}, data_factory=CustomData) +assert list(col) == ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] +assert col.data.total == 26 +assert col.data.range(-3, None) == "xyz" + +``` + +If you need more complex data source, make sure you defined `__iter__`, +`total`, and `range`: + +```python +class CustomData(Data): + names = configurable_attribute(default_factory=["Anna", "Henry", "Mary"]) + + @property + def total(self): + return len(self.names) + + def __iter__(self): + yield from sorted(self.names) + + def range(self, start: Any, end: Any): + if not isinstance(start, str) or not isinstance(end, str): + return [] + + for name in self: + if name < start: + continue + if name > end: + break + yield name + +``` diff --git a/docs/structure/filters.md b/docs/structure/filters.md index c90dfe0..9c9ce84 100644 --- a/docs/structure/filters.md +++ b/docs/structure/filters.md @@ -1 +1,10 @@ # Filters + + +This service used only by HTML table serializers at the moment. It has two +configurable attributes `static_filters` and `static_actions`. `static_filters` +are used for building search form for the data table. `static_actions` are not +used, but you can put into it details about batch or record-level actions and +use these details to extend one of standard serializers. For example, +ckanext-admin-panel defines allowed actions (remove, restore, hide) for content +and creates custom templates that are referring these actions. diff --git a/docs/structure/index.md b/docs/structure/index.md index 9a4fde5..2a92b26 100644 --- a/docs/structure/index.md +++ b/docs/structure/index.md @@ -1 +1,79 @@ -# Collection structure +# Structure of the collection + +## Introduction + +Collections are designed to describe the data. Most common logic often can be +define declaratively and all imperative commands are ether hidden deep inside +different parts of collection or injected as tiny lambda-functions. + +But describing the data is not simple, especially if data needs to be +interactive. As result, collections have complex internal structure. Good news, +you don't need to know everything in order to use the collections. As long as +you need something simple, you can use the minimum of knowledge. + +Look how collection with all the users from DB can be created: + +/// tab | Using anonymous classes and verbose initialization +```python +from ckan import model +from ckanext.collection.shared import collection + +users = collection.ModelCollection(data_settings={"model": model.User}) + +``` +/// + +/// tab | Using dedicated class and simple initialization +```python +from ckan import model +from ckanext.collection.shared import collection, data + +class Users(collection.Collection): + DataFactory = data.ModelData.with_attributes(model=model.User) + +users = Users() +``` +/// + +For the standard scenarios, ckanext-collection already contains a number of +classes that do the heavy lifting. And in future, as more popular scenarios +discovered, the number of classes will grow. + +Still, custom requirements are often appear in the project. Because of it, +understanding how collection works and how it can be customized is the key +point in building the perfect collection. + +## Services + +Collection itself contains just a bare minimum of logic, and real magic happens +inside its *services*. Collection knows how to initialize services and usually +the only difference between all your collections, is the way all their services +are configured. + +Collection contains the following standard services: + +* `data`: controls the exact data that can be received from + collection. Contains logic for searching, filters, sorting, etc. +* `pager`: defines restrictions for data iteration. Exactly this service limits + results to 10 records when you iterating over collection. +* `serializer`: specifies how collection can be transformed into specific + format. Using correct serializer you'll be able to dump the whole collection + as CSV, JSON, YAML or render it as HTML table. +* `columns`: contains configuration of specific data columns used by other + services. It may define model attributes that are dumped into CSV, names of + the transformation functions that are applied to the certain attribute, names + of the columns that are available for sorting in HTML representation of data. +* `filters`: contains configuration of additional widgets produced during data + serialization. For example, when data is serialized into an HTML table, + filters can define configuration of dropdowns and input fields from the data + search form. + +/// tip + +You can define more services in custom collections. The list above only +enumerates all the services that are available in the base collection and in +all collections shipped with the current extension. For example, one of +built-in collections, `DbCollection` has additional service called +`db_connection` that can communicate with DB. + +/// diff --git a/docs/structure/pager.md b/docs/structure/pager.md index 95e5d4c..e307fd6 100644 --- a/docs/structure/pager.md +++ b/docs/structure/pager.md @@ -1 +1,51 @@ # Pager + +Pager service sets the upper and lower bounds on data used by +collection. Default pager used by collection relies on numeric `start`/`end` +values. But it's possible to define custom pager that uses alphabetical or +temporal bounds, as long as `range` method of your custom data service supports +these bounds. + +Standard pager(`ClassicPager`) has two configurable attributes: `page`(default: +1) and `rows_per_page`(default: 10). + +```python +col = StaticCollection("name", {}) +assert col.pager.page == 1 +assert col.pager.rows_per_page == 10 +``` + +Because of these values you see only first 10 records from data when iterating +the collection. Let's change pager settings: + +```python +col = StaticCollection( + "name", {}, + data_settings={"data": range(1, 100)}, + pager_settings={"page": 3, "rows_per_page": 6} +) +assert list(col) == [13, 14, 15, 16, 17, 18] +``` + +Pagination details are often passed with search parameters and have huge +implact on the required data frame. Because of it, if `pager_settings` are +missing, `ClassicPager` will look for settings inside collection +parameters(second argument of the collection constructor). But in this case, +pager will use only items that has `:` prefix: + +```python +col = StaticCollection( + "xxx", + {"xxx:page": 3, "xxx:rows_per_page": 6}, + data_settings={"data": range(1, 100)} +) +assert list(col) == [13, 14, 15, 16, 17, 18] + +col = StaticCollection( + "xxx", + {"page": 3, "rows_per_page": 6}, + data_settings={"data": range(1, 100)} +) +assert list(col) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +``` diff --git a/docs/structure/serializer.md b/docs/structure/serializer.md index ab45ae6..a8b0b3a 100644 --- a/docs/structure/serializer.md +++ b/docs/structure/serializer.md @@ -1 +1,35 @@ # Serializer + + +Serializer converts data into textual, binary or any other alternative +representation. For example, if you want to compute records produced by the +`data` service of the collection into pandas' DataFrame, you should probably +use serializer. + +Serializers are main users of columns service, because it contains details +about specific data columns. And serializers often iterate data service +directly(ignoring `range` method), to serialize all available records. + +The only required method for serializer is `serialize`. This method must return +an data from `data` service transformed into format provided by serializer. For +example, `JsonSerializer` returns string with JSON-encoded data. + +You are not restricted by textual or binary formats. Serializer that transforms +data into pandas' DataFrame is completely valid version of the serializer. + +```python +class NewLineSerializer(Serializer): + def serialize(self): + result = "" + for item in self.attached.data: + result += str(item) + "\n" + + return result + +col = StaticCollection( + "name", {}, + serializer_factory=NewLineSerializer, + data_settings={"data": [1, 2, 3]} +) +assert "".join(col.serializer.serialize()) == "1\n2\n3\n" +``` diff --git a/docs/usage.md b/docs/usage.md index 686c038..ee6acbd 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,5 +1,7 @@ # Usage +## Register collection + Collection can be initialized anywhere in code /// admonition @@ -8,48 +10,77 @@ Collection can be initialized anywhere in code ```python from my.module import MyCollection -col = Collection("", {}) +col = MyCollection() ``` /// But it's recommended to register collections globally. -Collections are registered via [ICollection interface](interfaces.md#icollection) or -via CKAN signals. Registered collection can be initialized anywhere in code -using helper and can be used in a number of generic endpoints that render -collection as HTML of export it into different formats. +Collections are registered via [ICollection +interface](interfaces.md#icollection) or via CKAN signals. Registered +collection can be initialized anywhere in code using helper. It can also be +used in a number of generic endpoints that render collection as HTML or export +it into different formats. /// tab | Register via interface -hello -/// +```python +import ckan.plugins as p +from ckanext.collection import shared -/// tab | Register via signal -world +class MyPlugin(p.SingletonPlugin): + p.implements(shared.ICollection, inherit=True) + + def get_collection_factories( + self, + ) -> dict[str, shared.types.CollectionFactory]: + return { + "my-collection": MyCollection, + } + +``` /// -Registration via interface: +/// tab | Register via signal ```python -from ckanext.collection.interfaces import CollectionFactory, ICollection - +import ckan.plugins as p class MyPlugin(p.SingletonPlugin): - p.implements(ICollection, inherit=True) + p.implements(p.ISignal) - def get_collection_factories(self) -> dict[str, CollectionFactory]: + def get_signal_subscriptions(self) -> types.SignalMapping: return { - "my-collection": MyCollection, + tk.signals.ckanext.signal("collection:register_collections"): [ + get_collection_factories, + ], } +def get_collection_factories(sender: None): # (1)! + return { + "my-collection": MyCollection, + } + ``` +1. Signal listerners must receive at least one argument containing the sender + of the signal. Signal that register collections always sets `None` as a + sender. + +/// + + `get_collection_factories` returns a dictionary with collection names(letters, digits, underscores and hyphens are allowed) as keys, and collection factories -as values. In most generic case, collection factory is just a collection -class. But you can use any function with signature `(str, dict[str, Any], -**Any) -> Collection` as a factory. For example, the following function is a -valid collection factory and it can be returned from `get_collection_factories` +as values. In most generic case, collection factory is just a collection's +class itself. But you can use any function with signature `(str, dict[str, +Any], **Any) -> Collection` as a factory. + +/// admonition + type: example + +The following function is a valid collection factory and it can be returned +from `get_collection_factories` ```python def my_factory(name: str, params: dict[str, Any], **kwargs: Any): @@ -57,31 +88,209 @@ def my_factory(name: str, params: dict[str, Any], **kwargs: Any): params.setdefault("rows_per_page", 100) return MyCollection(name, params, **kwargs) ``` +/// + +## Initialize collection + +Collection class defines the data source of collection and different aspects of +it behavior. But collection class itself does not contain any data and +collection instance must be created to work with data. -If you want to register a collection only if collection plugin is enabled, you -can use CKAN signals instead of wrapping import from ckanext-collection into -try except block: +Any collection can be initialized directly, using collection class. And every +[registered collection](#register-collection) can be initialized via +`get_collection` function. Arguments are the same in both cases. Collection +requires the name, parameters and accepts arbitrary number of keyword-only +arguments, that are passed to underlying services. +/// tab | Initialize registered collection ```python +col = get_collection( + "my-collection", + {}, + pager_settings={"rows_per_page": 100} +) +``` +/// tip -class MyPlugin(p.SingletonPlugin): - p.implements(p.ISignal) +Second argument of `get_collection` expects parameters prefixed by collection +name. In example above, to choose the second page, you need to pass +`{"my-collection:page": 2}` as parameters. - def get_signal_subscriptions(self) -> types.SignalMapping: - return { - tk.signals.ckanext.signal("collection:register_collections"): [ - self.collect_collection_factories, - ], - } +If you are using unprefixed parameters, like `{"page": 2}` and don't want to +adapt them to expected form, pass `True` as the third argument to +`get_collection`, and every key inside parameters will get required prefix +automatically. - def collect_collection_factories(self, sender: None): - return { - "my-collection": MyCollection, - } +```python +col = get_collection( + "my-collection", + {"page": 2}, + True, + pager_settings={"rows_per_page": 100} +) +``` +/// +/// +/// tab | Initialize collection using class +```python +col = MyCollection( + "", + {}, + pager_settings={"rows_per_page": 100}, +) ``` +/// + +## Use collection data -Data returned from the signal subscription is exactly the same as from -`ICollection.get_collection_factories`. The only difference, signal -subscription accepts `sender` argument which is always `None`, due to internal -implementation of signals. +/// tip + +If you want to try examples below, but you haven't defined any collection yet, +you can use the following definition for collection of numbers from 1 to 25: + +```python +from ckanext.collection.shared import collection, data +class MyCollection(collection.Collection): + DataFactory = data.StaticData.with_attributes(data=[ + {"number": num, "index": idx} + for idx, num in enumerate(range(1,26)) + ]) + +``` +/// + +Intended way to access the data is iteration over collection instance. In this +way, you access only specific chunk of data, limited by collection's pager. + +```pycon +>>> col = MyCollection() +>>> list(col) +[{'number': 1, 'index': 0}, + {'number': 2, 'index': 1}, + {'number': 3, 'index': 2}, + {'number': 4, 'index': 3}, + {'number': 5, 'index': 4}, + {'number': 6, 'index': 5}, + {'number': 7, 'index': 6}, + {'number': 8, 'index': 7}, + {'number': 9, 'index': 8}, + {'number': 10, 'index': 9}] +``` + +Different page can be accessed by passing `page` inside params to collection's +constructor + +```pycon +>>> col = MyCollection("", {"page": 3}) # (1)! +>>> list(col) +[{'number': 21, 'index': 20}, + {'number': 22, 'index': 21}, + {'number': 23, 'index': 22}, + {'number': 24, 'index': 23}, + {'number': 25, 'index': 24}] +``` + +1. More idiomatic form of this initialization is `#!py3 + MyCollection(pager_settings={"page": 3}))`. But this form is longer an + required deeper knowledge of collections. + +If you need to iterate over all collection items, without pagination, you can +use `data` attribute of the collection. + +/// warning + +Using data directly can result in enormous memory consumption. Avoid +transforming data into list(`#!py3 list(col.data)`) or processing it as single object +in any other way. Instead, iterate over collection items using loops or similar +tools. + +/// + +/// admonition + type: example + +```pycon +>>> sum = 0 +>>> for item in col.data: +>>> sum += item["number"] +>>> +>>> print(sum) +325 +``` +/// + +## Serialize collection + +The ultimate goal of every collection is serialization. It may be serialization +as HTML to show collection on one of application web-pages. Or serialization as +JSON to send collection to the external API. Or serialization as CSV to allow +user downloading the collection. Or even serialization as pandas' DataFrame to +process data from the collection using more advanced tools. + +`serializer` service of collection is responsible for serialization. If +required format of collections output is known in advance, `SerializerFactory` +can be defined on collection level. + +If the format of serialization can vary, `serializer` can be initialized +separately. + +/// tab | Using SerializerFactory + +```python +from ckanext.collection.shared import collection, serialize, data + +class MyCollection(collection.Collection): + DataFactory = data.StaticData.with_attributes(data=[ + {"number": num, "index": idx} + for idx, num in enumerate(range(1,26)) + ]) + SerializerFactory = serialize.CsvSerializer + +col = MyCollection() +print(col.serializer.serialize()) +``` +/// +/// tab | Creating serializers on demand + +```python +from ckanext.collection.shared import collection, serialize, data + +class MyCollection(collection.Collection): + DataFactory = data.StaticData.with_attributes(data=[ + {"number": num, "index": idx} + for idx, num in enumerate(range(1,26)) + ]) + + +col = MyCollection() + +json = serialize.JsonSerializer(col) +print(json.serialize()) + +csv = serialize.CsvSerializer(col) +print(csv.serialize()) + +``` +/// + + +/// admonition + type: warning + + +Keep in mind, as any other collection's service, serializer attaches itself to +collection when initialized and replaces the previous serializer. + +```pycon +>>> col = MyCollection() +>>> isinstance(col.serializer, serialize.Serializer) +True +>>> serialize.JsonSerializer(col) +>>> isinstance(col.serializer, serialize.JsonSerializer) +True +>>> serialize.CsvSerializer(col) +>>> isinstance(col.serializer, serialize.CsvSerializer) +True +``` +/// diff --git a/mkdocs.yml b/mkdocs.yml index 138766f..a841e5b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -5,6 +5,7 @@ repo_url: https://github.com/DataShades/ckanext-collection markdown_extensions: - pymdownx.snippets + - pymdownx.inlinehilite - pymdownx.blocks.admonition - pymdownx.blocks.details - pymdownx.blocks.tab: @@ -56,6 +57,7 @@ nav: - index.md - installation.md - usage.md + - interfaces.md - Structure: - structure/index.md - structure/collection.md @@ -64,7 +66,8 @@ nav: - structure/filters.md - structure/serializer.md - structure/pager.md - - interfaces.md - API: - api/index.md + - configuration.md + - integrations.md - detailed.md