diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f66505c5..4e903ac4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ 3.9 ] + python-version: [ "3.10" ] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -34,10 +34,10 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade pip==20.2.4 wheel setuptools + python -m pip install --upgrade pip wheel setuptools pip install poetry poetry config virtualenvs.create false - poetry install + poetry install --extras "pyyaml json5" cat poetry.lock - name: Test with pytest diff --git a/README.md b/README.md index fff7e2b8..6d24d0de 100644 --- a/README.md +++ b/README.md @@ -1,379 +1,343 @@ # OARepo model builder -An utility library that generates OARepo required data model files from a JSON specification file. +A library and command-line tool to generate invenio model from a single model file. -## Installation +## CLI Usage -```shell -poetry install oarepo-model-builder +```bash +oarepo-compile-model model.yaml ``` -## Data model specification file - -Data model specification should be a JSON5 formatted file based on JSON Schema with the following OArepo specific -extension keywords: - -### oarepo:use +will compile the model.yaml into the current directory. Options: + +```bash + --output-directory Output directory where the generated files will be + placed. Defaults to "." + --package Package into which the model is generated. If not + passed, the name of the current directory, + converted into python package name, is used. + --set Overwrite option in the model file. + Example --set settings.elasticsearch.keyword-ignore-above=20 + -v Increase the verbosity. This option can be used + multiple times. + --config Load a config file and replace parts of the model + with it. The config file can be a json, yaml or a + python file. If it is a python file, it is + evaluated with the current model stored in the + "oarepo_model" global variable and after the + evaluation all globals are set on the model. + --isort / --skip-isort Call isort on generated sources (default: yes) + --black / --skip-black Call black on generated sources (default: yes) +``` -- Defines a reference to another datamodel. Contents of referenced datamodel will be used in place of this directive. -- Can be used anywhere in the specification file. -- This directive will be replaced by a referred datamodel type contents. -- Only new properties will be added to current property/object. -- All referenced datamodels must be registered under *datamodel* entrypoints (see **Entrypoints**) +## Model file structure -#### Syntax +A model is a json/yaml file with the following structure: -```json -"oarepo:use": List[string] | string // list of datamodel type references or single string reference +```yaml +settings: + python: + elasticsearch: +model: + properties: + title: { type: 'fulltext' } ``` -#### Example Usage: - -The following source specification: - -```json5 -// datamodel.json5 -{ - "title": "Test record v1.0.0", - "type": "object", - "additionalProperties": false, - "oarepo:use": [ - "include1" - ], - "properties": { - "field0": { - "oarepo:use": "include2" - } - } -} +There might be more sections (documentation etc.), but only the ``settings`` and ``model`` are currently processed. + +### settings section + +The settings section might contain the following keys +(default values below): + +```yaml +settings: + package: basename(output dir) with '-' converted to '_' + kebap-package: to_kebap(package) + package-path: path to package as python Path instance + schema-version: 1.0.0 + schema-name: { kebap-package }-{schema-version}.json + schema-file: full path to generated json schema + mapping-file: full path to generated mapping + collection-url: camel_case(last component of package) + + processing-order: [ 'settings', '*', 'model' ] + + python: + record-prefix: camel_case(last component of package) + templates: { } # overridden templates + marshmallow: + top-level-metadata: true + mapping: { } + + record-prefix-snake: snake_case(record_prefix) + + record-class: { settings.package }.record.{record_prefix}Record + # full record class name with package + record-schema-class: { settings.package }.schema.{record_prefix}Schema + # full record schema class name (apart from invenio stuff, contains only metadata field) + record-schema-metadata-class: { settings.package }.schema.{record_prefix}MetadataSchema + # full record schema metadata class name (contains model schema as marshmallow) + record-schema-metadata-alembic: { settings.package_base } + # name of key in pyproject.toml invenio_db.alembic entry point + record-metadata-class: { settings.package }.metadata.{record_prefix}Metadata + # db class to store record's metadata + record-metadata-table-name: { record_prefix.lower() }_metadata + # name of database table for storing metadata + record-permissions-class: { settings.package }.permissions.{record_prefix}PermissionPolicy + # class containing permissions for the record + record-dumper-class: { settings.package }.dumper.{record_prefix}Dumper + # record dumper class for elasticsearch + record-search-options-class: { settings.package }.search_options.{record_prefix}SearchOptions + # search options for the record + record-service-config-class: { settings.package }.service_config.{record_prefix}ServiceConfig + # configuration of record's service + record-resource-config-class: { settings.package }.resource.{record_prefix}ResourceConfig + # configuration of record's resource + record-resource-class: { settings.package }.resource.{record_prefix}Resource + # record resource + record-resource-blueprint-name: { record_prefix } + # blueprint name of the resource + register-blueprint-function: { settings.package }.blueprint.register_blueprint' + # name of the blueprint registration function + + elasticsearch: + keyword-ignore-above: 50 + + plugins: + packages: [ ] + # list of extra packages that should be installed in compiler's venv + output|builder|model|property: + # plugin types - file outputs, builders, model preprocessors, property preprocessors + disabled: [ ] + # list of plugin names to disable + # string "__all__" to disable all plugins in this category + enabled: + # list of plugin names to enable. The plugins will be used + # in the order defined. Use with disabled: __all__ + # list of "module:className" that will be added at the end of + # plugin list ``` -```json5 -// datamodels/include1.json5 -{ - "title": "Included properties v1.0.0", - "type": "object", - "additionalProperties": true, - "properties": { - "includedField1": { - "type": "string", - } - } -} +### model section + +The model section is a json schema that might be annotated with extra sections. For example: + +```yaml +model: + properties: + title: + type: multilingual + oarepo:ui: + label: Title + class: bold-text + oarepo:documentation: | + Lorem ipsum ... + Dolor sit ... ``` -```json5 -// datamodels/include2.json5 -{ - "type": "number" -} -``` +**Note**: ``multilingual`` is a special type (not defined in this library) that is translated to the correct schema, +mapping and marshmallow files with a custom ``PropertyPreprocessor``. -```python -# setup.py -entry_points = { - 'oarepo_model_builder.datamodels': [ - 'includes = mypkg.datamodels' - ], - ... -} -``` +``oarepo:ui`` gives information for the ui output + +``oarepo:documentation`` is a section that is currently ignored + +## Referencing a model -Will be compiled into resulting JSON Schema: - -```json -{ - "title": "Test record v1.0.0", - "type": "object", - "additionalProperties": false, - "properties": { - "field0": { - "type": "number" - }, - "includedField1": { - "type": "string" +## API Usage + +To generate invenio model from a model file, perform the following steps: + +1. Load the model into a ``ModelSchema`` instance + ```python + from oarepo_model_builder.schema import ModelSchema + from oarepo_model_builder.loaders import yaml_loader + + included_models = { + 'my_model': lambda parent_model: {'test': 'abc'} } - } -} -``` + loaders = {'yaml': yaml_loader} + + model = ModelSchema(file_path='test.yaml', + included_models=included_models, + loaders=loaders) + ``` -### oarepo:search + You can also path directly the content of the file path in ``content`` attribute -- Specifies how the current field/object should be indexed and handled for search, filtering and aggregations -- Used by the ElasticSearch Mapping generator to generate property mappings. -- If not present on a property, a default mapping type of **keyword** or **object** (for object-type properties) is assumed in resulting mapping output -- Value can be either string or object. -- This directive is omitted from the JSON Schema builder output + The ``included_models`` is a mapping between model key and its accessor. It is used to replace any ``oarepo:use`` + element. See the Referencing a model above. -#### Syntax + The ``loaders`` handle loading of files - the key is lowercased file extension, value a function taking (schema, + path) and returning loaded content -```json5 -mapping_config: string // string value represents an ES mapping type of property -// or -mapping_config: object // you can also pass object for more complex property mapping configurations -// or -mapping_config: false // parent field should be omitted from the generated ES mapping output +2. Create an instance of ``ModelBuilder`` -"oarepo:search": { - "mapping": mapping_config // "oarepo:search" block will be substituted by mapping_object configuration ES mapping output -} -// or -"oarepo:search": string | false // "string" represents an ES mapping type of the parent property -``` + To use the pre-installed set of builders and preprocessors, invoke: -#### Example usage - -The following source specification: - -```json5 -{ - "properties": { - "testNoMapping": { - "type": "string", - "oarepo:search": false - }, - "testDefault": { - "type": "string" - }, - "testExplicit": { - "type": "string", - "oarepo:search": { - "mapping": "text" - } - }, - "testShorthand": { - "type": "string", - "oarepo:search": "date" - }, - "testObject": { - "type": "string", - "oarepo:search": { - "mapping": { - "type": "text", - "index": "false" - } - } - }, - "testArray": { - "type": "array", - "items": { - "type": "string", - "oarepo:search": "date" - } - } - } -} -``` + ```python + from oarepo_model_builder.entrypoints \ + import create_builder_from_entrypoints + + builder = create_builder_from_entrypoints() + ``` -Will result in the following files: - -```json5 -// mappings/v7/mymodel-v1.0.0.json -{ - "mappings": { - "properties": { - "testDefault": {"type": "keyword"}, - "testExplicit": {"type": "text"}, - "testShorthand": {"type": "date"}, - "testObject": { - "type": "text", - "index": "false" - }, - "testArray": {"type": "date"} - } - } -} -``` + To have a complete control of builders and preprocessors, invoke: -```json5 -// jsonschemas/.../mymodel-v1.0.0.json -{ - "properties": { - "testNoMapping": {"type": "string"}, - "testDefault": {"type": "string"}, - "testExplicit": {"type": "string"}, - "testShorthand": {"type": "string"}, - "testObject": {"type": "string"}, - "testArray": { - "type": "array", - "items": { - "type": "string" - } - } - } -} -``` + ```python + from oarepo_model_builder.builder import ModelBuilder + from oarepo_model_builder.builders.jsonschema import JSONSchemaBuilder + from oarepo_model_builder.builders.mapping import MappingBuilder + from oarepo_model_builder.outputs.jsonschema import JSONSchemaOutput + from oarepo_model_builder.outputs.mapping import MappingOutput + from oarepo_model_builder.outputs.python import PythonOutput + from oarepo_model_builder.property_preprocessors.text_keyword import TextKeywordPreprocessor + from oarepo_model_builder.model_preprocessors.default_values import DefaultValuesModelPreprocessor + from oarepo_model_builder.model_preprocessors.elasticsearch import ElasticsearchModelPreprocessor -### oarepo:ui + builder = ModelBuilder( + output_builders=[JSONSchemaBuilder, MappingBuilder], + outputs=[JSONSchemaOutput, MappingOutput, PythonOutput], + model_preprocessors=[DefaultValuesModelPreprocessor, ElasticsearchModelPreprocessor], + property_preprocessors=[TextKeywordPreprocessor] + ) + ``` -- Directive used to specify a field metadata to be used by UI representations of the data model -#### Syntax +3. Invoke -```json5 -multilingual_string: {lang_code: value, ...} + ```python + builder.build(schema, output_directory) + ``` -"oarepo:ui": { - "title": multilingual_string, // Property or object title - "label": multilingual_string, // Label to be displayed on property input fields - "hint": multilingual_string // Additional hint to be displayed on property input fields -} -``` +## Extending the builder -### Example usage - -The following source specification: -```json5 - -{ - "title": "Test record v1.0.0", - "type": "object", - "oarepo:ui": { - "title": { - "cs": "Testovaci zaznam", - "en": "Test record" - }, - }, - "properties": { - "field1": { - "type": "string", - "oarepo:ui": { - "label": { - "en": "Test field 1" - }, - "hint": { - "cs": "Vyplnte testovaci field", - "en": "Please provide test field input" - } - } - } - } -} -``` +### Builder pipeline -Will result in the following files: - -##### TODO(@mesemus): - -## Customization - -### Build configuration - -You can override some build process defaults using a custom JSON configuration file, starting with configuration -from `./config/defauts.json`. -```json5 -// build-config.json -{ - "jsonschema": { - "type": "object", - "additionalProperties": false - }, - "search": { - "default_mapping_type": "keyword", - "mapping": { - "settings": { - "analysis": { - "char_filter": { - "configured_html_strip": { - "type": "html_strip", - "escaped_tags": [] - } - }, - "normalizer": { - "wsnormalizer": { - "type": "custom", - "filter": [ - "trim" - ] - } - }, - "filter": { - "czech_stop": { - "type": "stop", - "stopwords": "_czech_" - }, - "czech_stemmer": { - "type": "stemmer", - "language": "czech" - } - }, - "analyzer": { - "default": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "czech_stop", - "czech_stemmer" - ] - } - } - } - }, - "mappings": { - "dynamic": false, - "date_detection": false, - "numeric_detection": false, - "properties": {} - } - } - } -} -``` +![Pipeline](./docs/oarepo-model-builder.png) -### Entrypoints +At first, an instance of [ModelSchema](./oarepo_model_builder/schema.py) is obtained. The schema can be either passed +the content of the schema as text, or just a path pointing to the file. The extension of the file determines +which [loader](./oarepo_model_builder/loaders/__init__.py) is used. JSON, JSON5 and YAML are supported out of the box ( +if you have json5 and pyyaml packages installed) -This package uses the following entrypoints in the build process to determine, which builders and data models -should be considered: +Then [ModelBuilder](./oarepo_model_builder/builder.py).build(schema, output_dir) is called. -#### oarepo_model_builder.source -Classes responsible for parsing the whole source data model specification file. +It begins with calling all [ModelPreprocessors](./oarepo_model_builder/model_preprocessors/__init__.py). They get the +whole schema and settings and can modify both. +See [ElasticsearchModelPreprocessor](./oarepo_model_builder/model_preprocessors/elasticsearch.py) as an example. The +deepmerge function does not overwrite values if they already exist in settings. -Default: -```python -datamodel = "oarepo_model_builder.handlers:DataModelBuilder" -``` +For each of the outputs (jsonschema, mapping, record, resource, ...) +the top-level properties of the transformed schema are then iterated. +The order of the top-level properties is given by ``settings.processing-order``. -#### oarepo_model_builder.elements -Classes for building the output files from elements in a source data model specification file. +The top-level property and all its descendants (a visitor patern, visiting property by property), +a [PropertyPreprocessor](./oarepo_model_builder/property_preprocessors/__init__.py) +is called. -Default: -```python -jsonschema = "oarepo_model_builder.builders.jsonschema_builder:JSONSchemaBuilder" -mapping = "oarepo_model_builder.builders.mapping:MappingBuilder" -``` +The preprocessor can either modify the property, decide to remove it or replace it with a new set of properties +(see [multilang in tests](./tests/multilang.py) ). -#### oarepo_model_builder.{output_type} +The property is then passed to the +[OutputBuilder](./oarepo_model_builder/builders/__init__.py) +(an example is [JSONSchemaBuilder](./oarepo_model_builder/builders/jsonschema.py)) +that serializes the tree of properties into the output. -Classes responsible for generating output files of certain type +The output builder does not create files on the filesystem explicitly but uses instances +of [OutputBase](./oarepo_model_builder/outputs/__init__.py), for +example [JSONOutput](./oarepo_model_builder/outputs/json.py) or more +specialized [JSONSchemaOutput](./oarepo_model_builder/outputs/jsonschema.py). -Default: -```toml -[tool.poetry.plugins."oarepo_model_builder.jsonschema"] -jsonschema = "oarepo_model_builder.outputs:JsonSchemaOutput" +See [JSONBaseBuilder](./oarepo_model_builder/builders/json_base.py) for an example of how to get an output and write to +it (in this case, the json-based output). + +This way, even if more output builders access the same file, their access is coordinated. + +### Registering Preprocessors, Builders and Outputs for commandline client -[tool.poetry.plugins."oarepo_model_builder.mapping"] -mapping = "oarepo_model_builder.outputs:MappingOutput" +The model & property preprocessors, output builders and outputs are registered in entry points. In poetry, it looks as: + +```toml +[tool.poetry.plugins."oarepo_model_builder.builders"] +010-jsonschema = "oarepo_model_builder.builders.jsonschema:JSONSchemaBuilder" +020-mapping = "oarepo_model_builder.builders.mapping:MappingBuilder" +030-python_structure = "oarepo_model_builder.builders.python_structure:PythonStructureBuilder" +040-invenio_record = "oarepo_model_builder.invenio.invenio_record:InvenioRecordBuilder" + +[tool.poetry.plugins."oarepo_model_builder.ouptuts"] +jsonschema = "oarepo_model_builder.outputs.jsonschema:JSONSchemaOutput" +mapping = "oarepo_model_builder.outputs.mapping:MappingOutput" +python = "oarepo_model_builder.outputs.python:PythonOutput" + +[tool.poetry.plugins."oarepo_model_builder.property_preprocessors"] +010-text_keyword = "oarepo_model_builder.preprocessors.text_keyword:TextKeywordPreprocessor" + +[tool.poetry.plugins."oarepo_model_builder.model_preprocessors"] +01-default = "oarepo_model_builder.transformers.default_values:DefaultValuesModelPreprocessor" +10-invenio = "oarepo_model_builder.transformers.invenio:InvenioModelPreprocessor" +20-elasticsearch = "oarepo_model_builder.transformers.elasticsearch:ElasticsearchModelPreprocessor" + +[tool.poetry.plugins."oarepo_model_builder.loaders"] +json = "oarepo_model_builder.loaders:json_loader" +json5 = "oarepo_model_builder.loaders:json_loader" +yaml = "oarepo_model_builder.loaders:yaml_loader" +yml = "oarepo_model_builder.loaders:yaml_loader" + +[tool.poetry.plugins."oarepo_model_builder.templates"] +99-base_templates = "oarepo_model_builder.invenio.templates" ``` +### Generating python files -#### oarepo_model_builder.datamodels -Python modules containing data model specification files +The default python output is based on [libCST](https://github.com/Instagram/LibCST) that enables merging generated code +with a code that is already present in output files. The transformer provided in this package can: -## Usage +1. Add imports +2. Add a new class or function on top-level +3. Add a new method to an existing class +4. Add a new const/property to an existing class -To build a data model files from a specification file, this package provides the `models` script: +The transformer will not touch an existing function/method. Increase verbosity level to get a list of rejected patches +or add ``--set settings.python.overwrite=true`` +(use with caution, with sources stored in git and do diff afterwards). -```shell -$ models build --help -Usage: models build [OPTIONS] SOURCE +#### Overriding default templates - Build data model files from JSON5 source specification. +The default templates are written as jinja2-based templates. -Options: - --package TEXT Package name of the model (example: 'test-package') - --config PATH Path to custom build config file (example: './build-config.json') - --datamodel-version TEXT Version string of the built model: (example: '1.0.0') - --help -``` +To override a single or multiple templates, create a package containing the templates and register it +in ``oarepo_model_builder.templates``. Be sure to specify the registration key smaller than ``99-``. The template loader +iterates the sorted set of keys and your templates would be loaded before the default ones. Example: + ``` + my_package + +-- __init__.py + +-- templates + +-- invenio_record.py.jinja2 + ``` + + ```python + # my_package/__init__.py +TEMPLATES = { + # resolved relative to the package + "record": "templates/invenio_record.py.jinja2" +} + ``` + + ```toml + [tool.poetry.plugins."oarepo_model_builder.templates"] +20-my_templates = "my_package" + ``` + +To override a template for a single model, in your model file (or configuration file with -c option or via --set option) +, specify the relative path to the template: + +```yaml +settings: + python: + templates: + record: ./test/my_invenio_record.py.jinja2 +``` diff --git a/docs/Copy of NRP - overview.drawio.png b/docs/Copy of NRP - overview.drawio.png new file mode 100644 index 00000000..e4030660 Binary files /dev/null and b/docs/Copy of NRP - overview.drawio.png differ diff --git a/docs/model.json5 b/docs/model.json5 new file mode 100644 index 00000000..2c6fc11d --- /dev/null +++ b/docs/model.json5 @@ -0,0 +1,13 @@ +{ + settings: { + package: 'record_test', + }, + model: { + properties: { + a: { + type: 'fulltext', + minLength: 5 + } + }, + } +} \ No newline at end of file diff --git a/docs/oarepo-model-builder.drawio b/docs/oarepo-model-builder.drawio new file mode 100644 index 00000000..4b5fe107 --- /dev/null +++ b/docs/oarepo-model-builder.drawio @@ -0,0 +1 @@ +7V1bc6M4Fv41qdp5iAsQYHjMbbp7q3s6U0ltZvclhUG2mcaIETiO8+tXAoQByUZOADs23Q8xQoA437nrSFyAm8XrF+xE8x/Ig8GFpnivF+D2QtNUVTHJH9qyzlsUzchaZtj38rZNw4P/BlnHvHXpezCudEwQChI/qja6KAyhm1TaHIzRqtptioLqUyNnBrmGB9cJ+NYn30vm7MVMe3PiK/Rn8/zRljbOTiwc1jl/k3jueGhVagJ3F+AGI5RkvxavNzCg1GN0ya77fcvZYmAYhonMBW9fn66WP1e3/3n65/vdt+9vsZP8eZnf5cUJlvkLMwwf3DlcOPnQkzWjR7zyF4ETkqPrOHFwkiNGng2u5wj7byhMnIC0qKTBnfuB991ZoyUdYYIhZAelvo+kmZylN8Aw9t+cScCOXdrBDyF+XEcwvwNpnvpBcIMChNMhgekUmq6bDgijX7B0xhvbE4XeiKcVe3GIE/haaspp9wWiBUzwmnR5rTLkumDt7Hi14QrNzNvmJYZQ7bzRyTlxVtx6Axb5keO1D3YWB14Mk8QPZzEH22ruJ/Ahclx6vCKySiFIFgwonj47uUWaaFqVaDpPM1UR0EzvimSAo9giY/fjIJeqHxm9eA4jGsEMqABPMPk1o7+SVDivqHZdBgF51eRY6amCQxN0zBGUoxX0iAHKD2EwQau7TcN12kBOUBr4LlW0ZTpitAw96OXacz+qxmiJXdgsO0Tpz+Cu++U8Q19jJ0YYBk7iv1TtrIjg+aX3yCfvUWB7qRo1cLUaaNkb5ZfVcCvG8X4oNXOr7YwwjDByYRwjTHSx8q8Vwr/IXxRSAs4h5cA5CujfXP8ocWpuf9tlb6vWlFhf95fAnOasULaRIUqv3/R5oNcynqE2F94zLqk3/XBey8aZGPA4KaxzEDhR7G/sddUA549NB3qNsAdpo6qwJqoOiK0ibZeqVjP+dBRO4M9C8tslo6CXXkcUyRRb4/rCuKUvSd6mZO6V9F875h4oNe4S6GLNNgX2XulKd2i88rh2Yt+9oIxPRgJS2npw6iyDpNqYXsJ7BcQlTf2qOIFUI0cQ+2SoKU606X5zTPjpFTL3XBWDUfXKPANani7yyixtAkxTXkEVgvZeXw0AOa1vjLtCjrej38IXGProMo6gK0LvXKGzrSODzuaguwuIDvQFqJ0PSqplHhdMzLErwfSInTCeIrwgDpGmfL6w9l347m3nTHB0cS3gkxIdx7UF93zewFbjaNZlZLs/wY4utAV8LuCQse3HKXr44BboHEk5avUd3TKiNIe3+Vgb41vGOCcf4Oq818rBWYKnyG+nHpETzwuouEiO4XuVn1j4npcyAPOXFq8zOp8wyjL42mgCQ++K/qa3puRQDfqLUgpY5FeIEnfOnkb7fYUOfbphVPiGDtPzMXQTH9HnriANZtswo2OzHnJICmN34aLOe67EG6KM6xBakT8BXMCU4fxwk4io4ZtqvIoUCmN8QbZBHnSRAm2S9b0BssY1gEyDA0jk5oAW4HkaX82vof4//HL11/PX28cnhL4V0fwW3YhwMkczFDpBWUFWybLp8x2hKMfnb+IqrXP31VkmqIoeISBe/5Vfnx78lx6MDHZ4+1o+ebtuQqBRrxoWr1eFBFHHbevVj+FjDfhUCaIeCp+d4y6pt38//PyjFOLVfTkSoUVLqu8mSxK/EZ30gWi9wemrB/MCo1OP9hxoTYWTmKZrwcm0HS2o2TUtONZGvB4sHI1K6G50BSRvpwrsGHQ/a9DFrAd5YtHpdI0XAM3eRb/Gyz6Ecny/kjMFk1fiF1PaVnLppcR3ddalDnlyb6vvr9XcSQPUajsa+uuWUsM4G8F74wMhrYrQ6sQtpDzzdBMZ9s08BjB6YB51YJ4q8xinwTy21irz7CRqeerdiSI6mzx4fc1en6GA0VjW7SscxNadCJZI+zROhCkpyqrdtih/yME2+UKVfPY5HYKLsCcjNqazoPweTuJI5GyfnxiZmtqzGInhlagpG9K02dl6mlY3D56mNfk8xpCm3QBk95emFcMjyk4M0iUlXYZIO/YrXYydBukSSZcpKPboVbrGfKnH3zEKs2LYmN6bc0wmgTO/fFFHCvlP+3JoNXgXdema0BLKkYfcZcoIgtUlU23L6hJzYhotleHU6kmAIWmXuitU1zhkFlmIJYblZXy+aOmGpJ4zOkOLD4gzx34UrU+S5KZmjjRJqoOuPO+xhOc9I0o8kn//YiFjXi54UV4rKDM1YI51EWF0AV2szrhRog6IcdnKWacpKco/v+DGgaFMxjIBMQro867jLCakHJVeA/EdCWPTItnmitncItdrMTFKnKR0XI5bM6tdjlzj0IkeUZYC26u8q2CVZoZvQK07jW8MqL0ftXVVTR0MRD7XM4C4L4iC2tR+QZQovOvarpjmEdoViVj83Jjb2gL48dgVxg4Dau9B7UjsisUH6gOIn82uqMoeKMYk1A8gyzc2xIxpupHtj6KMgAk01QK2Yim6YVsWYF3yWR1lZNjEomi6CsY2IMOyWgoSmQ/LKubHchQ3u6M4n0Y5KYobtTmxI6A4nwo5Hop3k3fX6+udDw/CHrHkiYJgKAcHYY9Y8ERBMNkeSQcDQZUwuV3HcnptpSAQOCKg33k5g9fS9xgRTy2h4965f0rGqmmv4oIpRouLfPpu2EOloz1UGhhcfppYsMIc9Mt9fIL6xzJI/IDQckng3+zsUObE7cqzjd0dOqpy2i19be6oogog7Gy/B4Ofevm9WKA9oMeh17yriqr1CJ8t4ZpssYp7aiF5Y7k9SLcEnN1ZkG5LTCqeWabF3nt/mkNnWmyJDP4AouSU/8FAHKYbPgBiD6iJi+/fbVk6i7d0QTXXlnhrU9XSOmEkUjJbCLOTzh82uIIS7K4MrvBFznrGfCenHKO5FQ74rF2mdiDs0dgKB3zWDtMHITwUZoL9vk41jbKXCdxef19f3CKsYe0qkSLGkHcLTjGR0g5+6tiSArCjVIrwJSSClL79XcPiidLd/IIYWolZ4H28XbstWln6qFqjoKuCHXh6dYDVsy7X3s0+n8YFVs+6erslEA/tBKtDLPoBEA+GmsahdrOMEzojfeIO1DaruMOBat43vlfvSeVTB5K7CL4fw0+6/YXKrVGV30UGdLeLjOAraE1LiJ+f/dBPnp9PdXGkLig962z18M4dcMrlPetkntbvDCK1VaRMzToGkWKPE22vNcC3HT7dPAqNqPFTYIPwSaC3x65oXaLHe5M7N144dWNmqJLL/DszZpqgVnWQp0Z5MtTjMGZ8dqt5I5OTF6qxeOu6fuWKT1kNciUhV2NDds/2DsUKiFJVwl3bz2yvdrX+JRhBUrHXvdqBzDeHi53lioUO2767lMHCVv8AgUIz6H+hVKT/dpG5ccfUEhENARFZ2wc/vWQpWzBs+PISd6N62Xz9PtlWsm18wUkMPZ8buS8SkUcqgmleOdO6dprPRiH7GmO6cCw9zgettiCvdm3yUjST06+88rmTLzCE2EngAJkQMmAINwPrFTU2pEHLSmtZVWlSj7Jq1q6nplvTs+QQI5SUu1OvPvuaLLj7Pw== \ No newline at end of file diff --git a/docs/oarepo-model-builder.png b/docs/oarepo-model-builder.png new file mode 100644 index 00000000..6869c973 Binary files /dev/null and b/docs/oarepo-model-builder.png differ diff --git a/oarepo_model_builder/__init__.py b/oarepo_model_builder/__init__.py index 907c549f..e69de29b 100644 --- a/oarepo_model_builder/__init__.py +++ b/oarepo_model_builder/__init__.py @@ -1,90 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. -"""OArepo module that generates data model files from a JSON specification file.""" - -import os -from copy import deepcopy -from functools import cached_property - -import json5 -import pkg_resources - -from oarepo_model_builder.config import Config -from oarepo_model_builder.errors import BuildError - - -class OARepoModelBuilder: - """OARepoModelBuilder extension state.""" - - def __init__(self): - self._builders = None - self._el_builders = None - self._default_conf = None - - @cached_property - def datamodels(self): - models = {} - for entry_point in pkg_resources.iter_entry_points('oarepo_model_builder.datamodels'): - ep = entry_point.load() - directory = os.path.dirname(ep.__file__) - for file in os.listdir(directory): - file_path = os.path.join(directory, file) - - if file.lower().endswith(('.json5', '.json')): - model_name = file.rsplit('.', 1)[0] - with open(file_path) as mf: - models[model_name] = json5.load(mf) - - return models - - def get_model(self, name): - try: - return deepcopy(self.datamodels[name]) - except KeyError: - raise BuildError(f'datamodel "{name}" not in registered datamodels') - - @property - def source_builders(self) -> list: - if self._builders is None: - builders = [] - for entry_point in pkg_resources.iter_entry_points('oarepo_model_builder.source'): - cls = entry_point.load() - builders.append(cls()) - builders.sort(key=lambda opener: -getattr(opener, '_priority', 10)) - self._builders = builders - return self._builders - - @property - def element_builders(self) -> list: - if self._el_builders is None: - builders = [] - for entry_point in pkg_resources.iter_entry_points('oarepo_model_builder.elements'): - cls = entry_point.load() - builders.append(cls()) - builders.sort(key=lambda opener: -getattr(opener, '_priority', 10)) - self._el_builders = builders - return self._el_builders - - @cached_property - def model_config(self): - try: - import importlib.resources as resources - except ImportError: - # Try backported to PY<37 `importlib_resources`. - import importlib_resources as resources - - from . import config - config_json = resources.read_text(config, 'default.json') - return Config(json5.loads(config_json)) - - def output_builders(self, output_type) -> list: - builders = [] - for entry_point in pkg_resources.iter_entry_points(f'oarepo_model_builder.{output_type}'): - cls = entry_point.load() - builders.append(cls()) - builders.sort(key=lambda opener: -getattr(opener, '_priority', 10)) - return builders diff --git a/oarepo_model_builder/api.py b/oarepo_model_builder/api.py deleted file mode 100644 index aac73040..00000000 --- a/oarepo_model_builder/api.py +++ /dev/null @@ -1,88 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. - -"""OArepo module that generates data model files from a JSON specification file.""" -from copy import deepcopy - -from deepmerge import Merger - -from oarepo_model_builder.errors import BuildError -from oarepo_model_builder.proxies import current_model_builder - -_includes_merger = Merger( - [ - (list, ["append"]), - (dict, ["merge"]), - (set, ["union"]) - ], - ["override"], - ["override"] -) - - -def resolve_includes(src, prop): - if isinstance(src, dict): - while 'oarepo:use' in src: - includes = src.pop('oarepo:use') - if not isinstance(includes, (list, tuple)): - included_datamodel = current_model_builder.get_model(includes) - elif includes: - included_datamodel = {} - - for inc in includes: - if isinstance(inc, str): - _includes_merger.merge(included_datamodel, - current_model_builder.get_model(inc)) - elif isinstance(inc, dict): - _includes_merger.merge(included_datamodel, deepcopy(inc)) - else: - included_datamodel = {} - - _includes_merger.merge(included_datamodel, src) - src.update(included_datamodel) - - if prop == 'properties': - for k, v in list(src.items()): - if isinstance(v, (str, list, tuple)): - src[k] = {'oarepo:use': v} - elif isinstance(v, dict) and 'items' in v and isinstance(v['items'], (str, list, tuple)): - v['items'] = {'oarepo:use': v['items']} - - for k, v in list(src.items()): - if k == 'properties' and isinstance(v, (str, list, tuple)): - src[k] = {'oarepo:use': v} - - resolve_includes(v, k) - - elif isinstance(src, (list, tuple)): - for v in src: - resolve_includes(v, prop) - - -def build_datamodel(src, config=None): - """Build data model files from source JSON5 specification data.""" - if config is None: - config = {} - - outputs = {} - - # Resolve includes - resolve_includes(src, None) - - # Iterate over registered model builders - for builder in current_model_builder.source_builders: - el_handlers = current_model_builder.element_builders - - builder(el=src, config=config, path=[], outputs=outputs, handlers=el_handlers) - - for output_name, output in list(outputs.items()): - out_handlers = current_model_builder.output_builders(output.output_type) - for oh in out_handlers: - oh(output.data, config, output.path, outputs) - - for output in outputs.values(): - output.save() diff --git a/oarepo_model_builder/builder.py b/oarepo_model_builder/builder.py new file mode 100644 index 00000000..30c0a703 --- /dev/null +++ b/oarepo_model_builder/builder.py @@ -0,0 +1,192 @@ +import copy +import importlib +from pathlib import Path +from typing import List, Dict, Type + +from .builders import OutputBuilder, ModelBuilderStack, ReplaceElement +from .outputs import OutputBase +from .property_preprocessors import PropertyPreprocessor +from .schema import ModelSchema +from .model_preprocessors import ModelPreprocessor + + +class ModelBuilder: + """ + Processes a model file and generates/updates sources for the model + """ + + model_preprocessor_classes: List[type(ModelPreprocessor)] + """ + Model preprocessor classes that are called after schema is loaded and before it is processed + """ + + output_classes: List[type(OutputBase)] + """ + Mapping between output type and its handler class + """ + + filtered_output_classes: Dict[str, type(OutputBase)] + """ + Filtered output classes by settings.plugins.disabled|enabled + """ + + output_builder_classes: List[type(OutputBuilder)] + """ + A list of extension classes to be used in build. + """ + + property_preprocessor_classes: List[type(PropertyPreprocessor)] + """ + Processor classes (called before and after file builder is called) + """ + + output_builders: List[OutputBuilder] + """ + A list of output_builders. Each extension is responsible for generating one or more files + """ + + outputs: Dict[Path, OutputBase] + """ + Mapping between concrete output (file path relative to output dir) and instance of builder class + """ + + property_preprocessors: List[PropertyPreprocessor] + """ + Current instances of processor classes. + """ + + def __init__( + self, + outputs: List[type(OutputBase)] = (), + output_builders: List[type(OutputBuilder)] = (), + property_preprocessors: List[type(PropertyPreprocessor)] = (), + model_preprocessors: List[type(ModelPreprocessor)] = (), + open=open + ): + """ + Initializes the builder + + :param output_builders: A list of extension classes to use in builds + :param outputs: List of file builder classes that generate files + :param property_preprocessors: List of output type processor classes + """ + self.output_builder_classes = [*output_builders] + for o in outputs: + assert o.TYPE, f'output_type not set up on class {o}' + self.output_classes = [*outputs] + self.property_preprocessor_classes = [*property_preprocessors] + self.model_preprocessor_classes = [*model_preprocessors] + self.filtered_output_classes = {o.TYPE: o for o in self.output_classes} + self.open = open + + def get_output(self, output_type: str, path: str | Path): + """ + Given a path, instantiate file builder on the path with the given output type + and return it. If the builder on the path has already been requested, return + the same instance of the builder. + + :param output_type: @see FileBuilder.output_type + :param path: relative path to output_dir, set in build() + :return: instance of FileBuilder for the path + """ + if not isinstance(path, Path): + path = Path(path) + path = self.output_dir.joinpath(path) + + output = self.outputs.get(path, None) + if output: + assert output_type == self.outputs[path].TYPE + else: + output = self.filtered_output_classes[output_type](self, path) + output.begin() + self.outputs[path] = output + return output + + # main entry point + def build(self, schema: ModelSchema, output_dir: str | Path): + """ + compile the schema to output directory + + :param schema: the model schema + :param output_dir: output directory where to put generated files + :return: the outputs (self.outputs) + """ + self.set_schema(schema) + self.filtered_output_classes = {o.TYPE: o for o in self._filter_classes(self.output_classes, 'output')} + self.output_dir = Path(output_dir).absolute() # noqa + self.outputs = {} + self.output_builders = [e(self) for e in self._filter_classes(self.output_builder_classes, 'builder')] + self.property_preprocessors = [e(self) for e in + self._filter_classes(self.property_preprocessor_classes, 'property')] + + for model_preprocessor in self._filter_classes(self.model_preprocessor_classes, 'model'): + model_preprocessor(self).transform(schema, schema.settings) + + # process the file + self._iterate_schema(schema) + + for output in sorted(self.outputs.values(), key=lambda x: x.path): + output.finish() + + return self.outputs + + def set_schema(self, schema): + self.schema = schema + self.settings = schema.settings + + # private methods + + def _filter_classes(self, classes: List[Type[object]], plugin_type): + plugin_config = self.settings.plugins.get(plugin_type, None) + if not plugin_config: + return classes + disabled = plugin_config.get('disabled', []) + enabled = plugin_config.get('enabled', []) + + if disabled == '__all__': + ret = [] + elif isinstance(disabled, (list, tuple)): + ret = [c for c in classes if c.TYPE not in disabled] + else: + raise AttributeError('Value of settings.plugin.*.disabled must be either ' + 'a list of names or string __all__ to disable all plugins.') + if enabled: + ret.extend( + [c for c in classes if c.TYPE in enabled] + ) + # add directly imported classes + for en in enabled: + if ':' in en: + en = en.split(':', maxsplit=1) + ret.append(getattr(importlib.import_module(en[0]), en[1])) + return ret + + def _iterate_schema(self, schema: ModelSchema): + for output_builder in self.output_builders: + output_builder.begin(schema, schema.settings) + + for proc in self.property_preprocessors: + proc.begin(schema, schema.settings) + + self._iterate_schema_output_builder(schema, output_builder) + + for proc in self.property_preprocessors: + proc.finish() + + output_builder.finish() + + def _iterate_schema_output_builder(self, schema: ModelSchema, output_builder: OutputBuilder): + def call_processors(stack, output_builder): + data = copy.deepcopy(stack.top.data) + for property_preprocessor in self.property_preprocessors: + data = property_preprocessor.process(output_builder.TYPE, data, stack) or data + return data + + def on_element(stack): + data = call_processors(stack, output_builder) + if isinstance(data, ReplaceElement): + return data + stack.top.data = data + return output_builder.process_element(stack) + + ModelBuilderStack(schema).process(on_element) diff --git a/oarepo_model_builder/builders/__init__.py b/oarepo_model_builder/builders/__init__.py index 00e625d9..925e1318 100644 --- a/oarepo_model_builder/builders/__init__.py +++ b/oarepo_model_builder/builders/__init__.py @@ -1,10 +1,91 @@ -from .element import ElementBuilder -from .json import JSONBuilder -from .jsonschema import JSONSchemaBuilder -from .mapping import MappingBuilder -from .source import SourceBuilder, DataModelBuilder -from .ui import UIBuilder - -__all__ = ('ElementBuilder', 'JSONBuilder', - 'JSONSchemaBuilder', 'MappingBuilder', - 'SourceBuilder', 'DataModelBuilder', 'UIBuilder') +from __future__ import annotations + +import functools +import inspect +from typing import TYPE_CHECKING + +from oarepo_model_builder.utils.json_pathlib import JSONPaths +from oarepo_model_builder.stack import ModelBuilderStack, ReplaceElement +from oarepo_model_builder.utils.verbose import log + +if TYPE_CHECKING: + from oarepo_model_builder.builder import ModelBuilder + + +def process(path, priority=0, condition=None): + def wrapper(f): + @functools.wraps(f) + def wrapped(*args, **kwargs): + return f(*args, **kwargs) + + wrapped.model_builder_path = path + wrapped.model_builder_priority = priority + wrapped.model_builder_condition = condition + return wrapped + + return wrapper + + +class OutputBuilder: + TYPE = None + + def __init__(self, builder: ModelBuilder): + self.builder = builder + # TODO: move this to metaclass and initialize only once per class + self.json_paths = JSONPaths() + arr = [] + for name, method in inspect.getmembers(self, inspect.ismethod): + if not hasattr(method, 'model_builder_priority'): + continue + arr.append( + ( + -method.model_builder_priority, + -len(method.model_builder_path), + method.model_builder_path, + id(method), + method.model_builder_condition, + method + ) + ) + arr.sort() + for _prior, _lpath, path, _mid, condition, method in arr: + self.json_paths.register(path, condition, method) + + def begin(self, schema, settings): + self.schema = schema + self.settings = settings + log.enter(2, 'Creating %s', self.TYPE) + pass + + def finish(self): + log.leave() + + def process_element(self, stack: ModelBuilderStack): + """ + Normally returns a generator with a single yield: + 1. first part is called on element start + 2. yield causes that the content of the element is processed + 3. generator is called again to finish the element + + If no generator is returned, the content of the element is not processed + """ + for method in self.json_paths.match(stack.path, stack.top.data, extra_data={ + 'stack': stack + }): + return method(stack) + # do not skip stack top + if stack.level > 1: + return stack.SKIP + + @process('/model') + def enter_model(self, stack: ModelBuilderStack): + # do not skip /model + yield + + +__all__ = [ + 'process', + 'OutputBuilder', + 'ModelBuilderStack', + 'ReplaceElement' +] diff --git a/oarepo_model_builder/builders/element.py b/oarepo_model_builder/builders/element.py deleted file mode 100644 index 7364f032..00000000 --- a/oarepo_model_builder/builders/element.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. - -"""OArepo module that generates data model files from a JSON specification file.""" - - -class ElementBuilder: - """Base element builder interface.""" - - def begin(self, config, outputs, root): - pass - - def end(self, config, outputs, root): - pass - - def pre(self, el, config, path, outputs): - raise NotImplemented - - def post(self, el, config, path, outputs): - raise NotImplemented - - def options(self): - """returns list/tuple of click.argument or click.option options""" - return () diff --git a/oarepo_model_builder/builders/json.py b/oarepo_model_builder/builders/json.py deleted file mode 100644 index adab12ac..00000000 --- a/oarepo_model_builder/builders/json.py +++ /dev/null @@ -1,39 +0,0 @@ -from typing import List - -from oarepo_model_builder.builders.element import ElementBuilder - - -class JSONBuilder(ElementBuilder): - """Hierarchic json builder.""" - IGNORED_SUBTREE = object() - IGNORED_NODE = object() - - def __init__(self): - self.stack: List = [{}] - - def should_ignore(self, element): - return element is self.IGNORED_SUBTREE or element is self.IGNORED_NODE - - def push(self, el, path): - top = self.stack[-1] - if top is self.IGNORED_SUBTREE: - self.stack.append(self.IGNORED_SUBTREE) - elif self.should_ignore(el): - self.stack.append(el) - else: - if top is self.IGNORED_NODE: - for t in reversed(self.stack[:-1]): - if t is not self.IGNORED_NODE: - top = t - break - - if isinstance(top, dict): - top[path[-1]] = el - elif isinstance(top, (list, tuple)): - top.append(el) - else: - raise NotImplemented(f'Set for datatype {type(top)} is not implemented') - self.stack.append(el) - - def pop(self): - self.stack.pop() diff --git a/oarepo_model_builder/builders/json_base.py b/oarepo_model_builder/builders/json_base.py new file mode 100644 index 00000000..068c7304 --- /dev/null +++ b/oarepo_model_builder/builders/json_base.py @@ -0,0 +1,31 @@ +from oarepo_model_builder.builders import OutputBuilder, process +from oarepo_model_builder.stack import ModelBuilderStack + + +class JSONBaseBuilder(OutputBuilder): + output_file_name: str = None + output_file_type: str = None + parent_module_root_name: str = None + + def model_element_enter(self, stack: ModelBuilderStack): + top = stack.top + match stack.top_type: + case stack.PRIMITIVE: + self.output.primitive(top.key, top.data) + case stack.LIST: + self.output.enter(top.key, []) + case stack.DICT: + self.output.enter(top.key, {}) + + def model_element_leave(self, stack: ModelBuilderStack): + if stack.top_type != stack.PRIMITIVE: + self.output.leave() + + @process('/model') + def enter_model(self, stack: ModelBuilderStack): + output_name = self.settings[self.output_file_name] + self.output = self.builder.get_output(self.output_file_type, output_name) + self.on_enter_model(output_name, stack) + + def on_enter_model(self, output_name, stack: ModelBuilderStack): + pass diff --git a/oarepo_model_builder/builders/jsonschema.py b/oarepo_model_builder/builders/jsonschema.py index e7020420..928287fb 100644 --- a/oarepo_model_builder/builders/jsonschema.py +++ b/oarepo_model_builder/builders/jsonschema.py @@ -1,39 +1,24 @@ -from typing import List, Dict - -import click - -from oarepo_model_builder.builders import JSONBuilder -from oarepo_model_builder.config import Config -from oarepo_model_builder.outputs import JsonSchemaOutput, BaseOutput - - -class JSONSchemaBuilder(JSONBuilder): - """Handles building of jsonschema from a data model specification.""" - - def __init__(self): - super().__init__() - self.output = None - - def begin(self, config, outputs, root): - output = outputs['jsonschema'] = JsonSchemaOutput() - output.path = config.resolve_path( - 'schema_path', - 'jsonschemas/{package}/{datamodel}-v{datamodel_version}.json') - self.stack[0] = output.data - - def pre(self, el, config: Config, path: List[str], outputs: Dict[str, BaseOutput]): - path_skipped = path[-1].startswith('oarepo:') - if path_skipped: - self.push(self.IGNORED_SUBTREE, path) - elif isinstance(el, dict): - self.push({}, path) - else: - self.push(el, path) - - def post(self, el, config, path, outputs): - self.pop() - - def options(self): - return [ - click.option('schema-path') - ] +from pathlib import Path + +from oarepo_model_builder.stack import ModelBuilderStack +from . import process +from .json_base import JSONBaseBuilder +from .utils import ensure_parent_modules +from ..utils.schema import is_schema_element + + +class JSONSchemaBuilder(JSONBaseBuilder): + TYPE = 'jsonschema' + output_file_type = 'jsonschema' + output_file_name = 'schema-file' + parent_module_root_name = 'jsonschemas' + + @process('/model/**', condition=lambda current: is_schema_element(current.stack)) + def model_element(self, stack: ModelBuilderStack): + self.model_element_enter(stack) + yield + self.model_element_leave(stack) + + def on_enter_model(self, output_name, stack: ModelBuilderStack): + ensure_parent_modules(self.builder, Path(output_name), + ends_at=self.parent_module_root_name) diff --git a/oarepo_model_builder/builders/mapping.py b/oarepo_model_builder/builders/mapping.py index d48b3732..d064d6f0 100644 --- a/oarepo_model_builder/builders/mapping.py +++ b/oarepo_model_builder/builders/mapping.py @@ -1,122 +1,51 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. - -"""OArepo module that generates data model files from a JSON specification file.""" - -import click - -from oarepo_model_builder.builders import JSONBuilder -from oarepo_model_builder.outputs import MappingOutput - - -class MappingBuilder(JSONBuilder): - """Handles building of ES mappings from source elements.""" - - def _resolve(self, el): - if isinstance(el, str): - return dict(type=el) - return el - - def _parse_oarepo_search(self, search, config): - if not search: - return False - if isinstance(search, dict) and 'mapping' in search: - return self._resolve(search['mapping']) - elif isinstance(search, str): - return self._resolve(search) - else: - return self._resolve(self.default_type(config)) - - def is_property(self, path): - return len(path) > 1 and \ - path[-1] not in ['properties', 'mappings'] and \ - path[-2] == 'properties' and \ - path[0] == 'properties' - - def begin(self, config, outputs, root): - output = outputs['mapping'] = MappingOutput() - output.path = config.resolve_path( - 'mapping_path', - 'mappings/v7/{datamodel}-v{datamodel_version}.json') - self.stack[0] = output.data - if 'oarepo:search' in root: - self.stack[-1].update(root['oarepo:search']) - self.push({}, ['mappings']) - - def default_type(self, config): - return config.search.get('default_mapping_type', 'keyword') - - def should_exclude(self, el, config): - return el is False or \ - ('oarepo:search' in el and self._parse_oarepo_search(el['oarepo:search'], config) is False) or \ - ('oarepo:search' not in el and self.default_type(config) is False) - - def update_mapping_type(self, mapping_type='object'): - """Sets a mapping type on first non-ignored element on stack""" - for idx, entry in enumerate(reversed(self.stack)): - if not self.should_ignore(entry): - self.stack[-(idx + 1)]['type'] = mapping_type - break - - def update_with_search(self, search, config): - """Update first non-ignored element on stack with search configuration.""" - for idx, entry in enumerate(reversed(self.stack)): - if not self.should_ignore(entry): - search_config = self._parse_oarepo_search(search, config) - if search_config is not False: - self.stack[-(idx + 1)].update(search_config) - break - - def pre(self, el, config, path, outputs): - if self.is_property(path): - if self.should_exclude(el, config): - self.push(self.IGNORED_SUBTREE, path) - return - - # Map a concrete property to ES mapping - self.push({}, path) - if 'properties' in el: - self.stack[-1]['type'] = 'object' - else: - self.stack[-1]['type'] = self.default_type(config) - elif path[-1] == 'properties': - # Map properties to ES mapping properties - self.push({}, path) - elif path[-1] == 'items': - # Map array items to certain ES property mapping - if 'properties' in el: - if not self.should_exclude(el, config): - self.update_mapping_type() - self.push(self.IGNORED_NODE, path) - else: - self.push(self.IGNORED_SUBTREE, path) - elif 'oarepo:search' in el: - self.update_with_search(el['oarepo:search'], config) - self.push(self.IGNORED_SUBTREE, path) - else: - self.push(self.IGNORED_NODE, path) - elif path[-1] == 'oarepo:search': - # Update certain element with config from oarepo:search directive - self.update_with_search(el, config) - self.push(self.IGNORED_SUBTREE, path) - elif path[0] != 'properties': - # Ignore everything not starting with properties and not handled above - self.push(self.IGNORED_SUBTREE, path) - else: - # Everything else is omitted from ES mapping output - self.push(self.IGNORED_NODE, path) - - def post(self, el, config, path, outputs): - self.pop() - - def end(self, config, outputs, root): - self.pop() - - def options(self): - return [ - click.option('mapping-path') - ] +from pathlib import Path + +from oarepo_model_builder.stack import ModelBuilderStack +from . import process +from .json_base import JSONBaseBuilder +from .utils import ensure_parent_modules +from ..utils.schema import is_schema_element +from ..utils.verbose import log + + +class MappingBuilder(JSONBaseBuilder): + TYPE = 'mapping' + output_file_type = 'mapping' + output_file_name = 'mapping-file' + parent_module_root_name = 'mappings' + + @process('/model/**', condition=lambda current: is_schema_element(current.stack)) + def enter_model_element(self, stack: ModelBuilderStack): + # ignore schema leaves different than "type" - for example, minLength, ... + # that should not be present in mapping + if stack.top_type in (stack.LIST, stack.DICT) or stack.top.key == 'type': + self.model_element_enter(stack) + + # process children + yield + + data = stack.top.data + if isinstance(data, dict) and 'oarepo:mapping' in data: + self.output.merge_mapping(data['oarepo:mapping']) + + self.model_element_leave(stack) + + def on_enter_model(self, output_name, stack: ModelBuilderStack): + ensure_parent_modules(self.builder, Path(output_name), + ends_at=self.parent_module_root_name) + self.output.merge( + self.settings.elasticsearch.templates[self.settings.elasticsearch.version] + ) + self.output.enter('mappings', {}) + + if self.settings.get('top_level_metadata'): + self.output.enter('properties', {}) + self.output.enter('metadata', {}) + self.output.primitive('type', 'object') + + def finish(self): + super().finish() + + log(log.INFO, f""" + invenio index init --force + """) \ No newline at end of file diff --git a/oarepo_model_builder/builders/python.py b/oarepo_model_builder/builders/python.py new file mode 100644 index 00000000..d23cb53d --- /dev/null +++ b/oarepo_model_builder/builders/python.py @@ -0,0 +1,21 @@ +from pathlib import Path + +from oarepo_model_builder.builders import OutputBuilder +from oarepo_model_builder.builders.utils import ensure_parent_modules + + +class PythonBuilder(OutputBuilder): + def module_to_path(self, module_name): + mod = module_name.split('.') + mod[-1] += '.py' + return Path(*mod) + + def create_parent_modules(self, python_path): + ensure_parent_modules( + self.builder, + python_path, + max_depth=len(python_path.parts) + ) + + def class_to_path(self, class_name): + return self.module_to_path(class_name.rsplit('.', maxsplit=1)[0]) diff --git a/oarepo_model_builder/builders/python_structure.py b/oarepo_model_builder/builders/python_structure.py new file mode 100644 index 00000000..53aa60f7 --- /dev/null +++ b/oarepo_model_builder/builders/python_structure.py @@ -0,0 +1,19 @@ +from oarepo_model_builder.builders import process +from oarepo_model_builder.builders.python import PythonBuilder +from oarepo_model_builder.builders.utils import ensure_parent_modules +from oarepo_model_builder.stack import ModelBuilderStack + + +class PythonStructureBuilder(PythonBuilder): + TYPE = 'python_structure' + + @process('/model') + def model(self, stack: ModelBuilderStack): + yield + package_path = self.settings.package_path + + ensure_parent_modules( + self.builder, + self.builder.output_dir.joinpath(package_path / '__init__.py'), + max_depth=len(package_path.parts) + ) diff --git a/oarepo_model_builder/builders/source.py b/oarepo_model_builder/builders/source.py deleted file mode 100644 index b253b73d..00000000 --- a/oarepo_model_builder/builders/source.py +++ /dev/null @@ -1,52 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. - -"""OArepo module that generates data model files from a JSON specification file.""" -from typing import List - -from oarepo_model_builder.builders import ElementBuilder - - -class SourceBuilder: - """Base builder from source specification interface.""" - - def walk(self, el, config, path, outputs, handlers): - raise NotImplemented - - def __call__(self, el, config, path, outputs, handlers, *args, **kwargs): - for h in handlers: - h.begin(config, outputs, el) - - self.walk(el, config, path, outputs, handlers, *args, **kwargs) - - for h in handlers: - h.end(config, outputs, el) - - def options(self): - """returns list/tuple of click.argument or click.option options""" - return () - - -class DataModelBuilder(SourceBuilder): - """Handles building a data model from a datamodel specification.""" - - def walk(self, el, config, path, outputs, handlers: List[ElementBuilder] = None): - """Walk the source data and call element handlers on each element.""" - if handlers is None: - handlers = [] - - if path: - for h in handlers: - h.pre(el, config, path, outputs) - - if isinstance(el, dict): - for k, v in list(el.items()): - self.walk(v, config, path + [k], outputs, handlers) - - if path: - for h in handlers: - h.post(el, config, path, outputs) diff --git a/oarepo_model_builder/builders/ui.py b/oarepo_model_builder/builders/ui.py deleted file mode 100644 index 02c64dc7..00000000 --- a/oarepo_model_builder/builders/ui.py +++ /dev/null @@ -1,62 +0,0 @@ -import copy -import os - -import click - -from oarepo_model_builder.builders.json import JSONBuilder -from oarepo_model_builder.outputs.toml_output import TomlOutput -from oarepo_model_builder.outputs.ui import UIOutput - - -class UIBuilder(JSONBuilder): - """Handles building of jsonschema from a data model specification.""" - - def __init__(self): - super().__init__() - self.output = None - - def is_property(self, path): - return len(path) > 1 and path[-2] == 'properties' # TODO: tohle neni uplne spravne - - def begin(self, config, outputs, root): - output = outputs['ui'] = UIOutput(path=config.resolve_path( - 'ui_path', - '{package}/oarepo_ui/{datamodel}-v{datamodel_version}.json')) - if 'poetry' not in outputs: - pyproject = outputs['pyproject'] = TomlOutput( - config.resolve_path('pyproject_path', 'pyproject.toml')) - else: - pyproject = outputs['pyproject'] - - pyproject.add( - 'tool.poetry.plugins.oarepo_ui', - config.datamodel, - os.path.relpath( - config.resolve_path( - 'ui_path', - '{package}/oarepo_ui:{datamodel}-v{datamodel_version}.json'), - config.base_dir).replace('/', '.') - ) - - self.stack[0] = output.data - if 'oarepo:ui' in root: - self.stack[-1].update(root['oarepo:ui']) # title etc - self.push({}, ['fields']) - - def pre(self, el, config, path, outputs): - if self.is_property(path): - self.push(copy.deepcopy(el.get('oarepo:ui', {})), path) - else: - self.push(self.IGNORED_NODE, path) # ignored node means that just the node is output, not the whole subtree - - def post(self, el, config, path, outputs): - self.pop() - - def end(self, config, outputs, root): - self.pop() - - def options(self): - return [ - click.option('ui-path'), - click.option('pyproject-path') - ] diff --git a/oarepo_model_builder/builders/utils.py b/oarepo_model_builder/builders/utils.py new file mode 100644 index 00000000..f668dc10 --- /dev/null +++ b/oarepo_model_builder/builders/utils.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from pathlib import Path + +if TYPE_CHECKING: + from oarepo_model_builder.builder import ModelBuilder + + +def ensure_parent_modules(builder: ModelBuilder, path: Path, + *, + ends_at: str = None, max_depth=5): + depth = 0 + path = path.parent + # 1st sanity check - maximum depth, path must not be a UNC drive name + while path and path.name and depth < max_depth: + depth += 1 + # 2nd sanity check - the path must not contain .git + if path.joinpath('.git').exists(): + break + + init_py_path = path.joinpath('__init__.py') + + # get python output so that other can write into it if required + # and there are no disk-level conflicts + builder.get_output('python', init_py_path) + + if path.name == ends_at: + break + path = path.parent diff --git a/oarepo_model_builder/cli.py b/oarepo_model_builder/cli.py index 860450fa..6c0dec4b 100644 --- a/oarepo_model_builder/cli.py +++ b/oarepo_model_builder/cli.py @@ -1,63 +1,125 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. - -"""OArepo module that generates data model files from a JSON specification file.""" -import functools +import datetime +import logging import os +import sys +from pathlib import Path import click -import json5 - -from oarepo_model_builder.api import build_datamodel -from oarepo_model_builder.proxies import current_model_builder - - -@click.group() -def model(): - """Management commands for OARepo Model Builder.""" - pass - - -def builder_arguments(f): - args = [] - # TODO: gather arguments from source and element builders when current_model_builder is a singleton - # TODO: can not be called now as it requires app_context that is not yet existing - # - # for builder in current_model_builder.source_builders: - # args.extend(builder.options()) - # for builder in current_model_builder.element_builders: - # args.extend(builder.options()) - for arg in args: - f = functools.wraps(f)(arg(f)) - return f - - -@model.command('build') -@click.argument('source', type=click.Path(readable=True, exists=True)) -@click.option('--package') -@click.option('--config-path', '-c', type=click.Path(readable=True, exists=True)) -@click.option('--datamodel-version', default='1.0.0') -@builder_arguments -def build(source, base_dir=os.getcwd(), config_path=None, **kwargs): - """Build data model files from JSON5 source specification.""" - click.secho('Generating models from: ' + source, fg='green') - with open(source) as datamodel_file: - data = json5.load(datamodel_file) - - config = current_model_builder.model_config - - if config_path: - with open(config_path) as config_file: - config.update(json5.load(config_file)) - - config.update(kwargs) # config is an instance of Munch, so can use either dict or dot style - config.source = source - config.base_dir = base_dir - config.package = kwargs['package'] or (os.path.basename(os.getcwd())).replace('-', '_') - config.kebab_package = config.package.replace('_', '-') - config.datamodel = config.kebab_package - build_datamodel(data, config=config) + +from oarepo_model_builder.entrypoints import load_entry_points_dict, create_builder_from_entrypoints +from oarepo_model_builder.schema import ModelSchema +from oarepo_model_builder.utils.deepmerge import deepmerge +from oarepo_model_builder.utils.verbose import log + +from .utils.hyphen_munch import HyphenMunch + + +@click.command() +@click.option('--output-directory', + help='Output directory where the generated files will be placed. ' + 'Defaults to "."') +@click.option('--package', + help='Package into which the model is generated. ' + 'If not passed, the name of the current directory, ' + 'converted into python package name, is used.') +@click.option('--set', 'sets', + help='Overwrite option in the model file. Example ' + '--set name=value', + multiple=True) +@click.option('-v', 'verbosity', + help='Increase the verbosity. This option can be used multiple times.', + count=True) +@click.option('--config', 'configs', + help='Load a config file and replace parts of the model with it. ' + 'The config file can be a json, yaml or a python file. ' + 'If it is a python file, it is evaluated with the current ' + 'model stored in the "oarepo_model" global variable and ' + 'after the evaluation all globals are set on the model.', + multiple=True) +@click.option('--isort/--skip-isort', default=True, help='Call isort on generated sources') +@click.option('--black/--skip-black', default=True, help='Call black on generated sources') +@click.argument('model_filename') +def run(output_directory, package, sets, configs, model_filename, verbosity, isort, black): + """ + Compiles an oarepo model file given in MODEL_FILENAME into an Invenio repository model. + """ + + # extend system's search path to add script's path in front (so that scripts called from the compiler are taken + # from the correct virtual environ) + os.environ['PATH'] = str(Path(sys.argv[0]).parent.absolute()) + os.pathsep + os.environ.get('PATH', '') + + # set the logging level, it will be warning - 1 (that is, 29) if not verbose, + # so that warnings only will be emitted. With each verbosity level + # it will decrease + logging.basicConfig( + level=logging.INFO - verbosity, + format='' + ) + + handler = logging.FileHandler(Path(output_directory) / 'installation.log', 'a') + handler.setLevel(logging.INFO) + logging.root.addHandler(handler) + + log.enter(0, '\n\n%s\n\nProcessing model %s into output directory %s', + datetime.datetime.now(), model_filename, output_directory) + + builder = create_builder_from_entrypoints() + loaders = load_entry_points_dict('oarepo_model_builder.loaders') + safe_loaders = {k: v for k, v in loaders.items() if getattr(v, 'safe', False)} + + schema = ModelSchema(model_filename, loaders=safe_loaders) + for config in configs: + load_config(schema, config, loaders) + + for s in sets: + k, v = s.split('=', 1) + schema.schema[k] = v + + check_plugin_packages(schema.settings) + + if package: + schema.settings['package'] = package + + if 'python' not in schema.settings: + schema.settings.python = HyphenMunch() + schema.settings.python.use_isort = isort + schema.settings.python.use_black = black + + builder.build(schema, output_directory) + + log.leave('Done') + + print(f"Log saved to {Path(output_directory) / 'installation.log'}") + + +def load_config(schema, config, loaders): + old_loaders = schema.loaders + schema.loaders = loaders + try: + loaded_file = schema._load(config) + schema.merge(loaded_file) + finally: + schema.loaders = old_loaders + + +def check_plugin_packages(settings): + try: + required_packages = settings.plugins.packages + except AttributeError: + return + import pkg_resources, subprocess + known_packages = set(d.project_name for d in pkg_resources.working_set) + unknown_packages = [rp for rp in required_packages if rp not in known_packages] + if unknown_packages: + if input(f'Required packages {", ".join(unknown_packages)} are missing. ' + f'Should I install them for you via pip install? (y/n) ') == 'y': + if subprocess.call([ + 'pip', 'install', *unknown_packages + ]): + sys.exit(1) + print("Installed required packages, please run this command again") + sys.exit(1) + + +if __name__ == '__main__': + run() diff --git a/oarepo_model_builder/config/__init__.py b/oarepo_model_builder/config/__init__.py deleted file mode 100644 index d4654512..00000000 --- a/oarepo_model_builder/config/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -import os - -from munch import AutoMunch - - -class Config(AutoMunch): - def resolve_path(self, config_name, default_path=None): - # if it is a config option and is set, return it (resolved to base_dir) - if config_name and self.get(config_name, False): - return os.path.join(self.base_dir, self[config_name]) - - if default_path is None: - raise AttributeError(f'Config "{config_name}" is not set') - - # otherwise construct it from default path - return os.path.join(self.base_dir, default_path.format(**self)) diff --git a/oarepo_model_builder/config/default.json b/oarepo_model_builder/config/default.json deleted file mode 100644 index 1c511790..00000000 --- a/oarepo_model_builder/config/default.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "jsonschema": { - "type": "object", - "additionalProperties": false - }, - "search": { - "default_mapping_type": "keyword", - "mapping": { - "settings": { - "analysis": { - "char_filter": { - "configured_html_strip": { - "type": "html_strip", - "escaped_tags": [] - } - }, - "normalizer": { - "wsnormalizer": { - "type": "custom", - "filter": [ - "trim" - ] - } - }, - "filter": { - "czech_stop": { - "type": "stop", - "stopwords": "_czech_" - }, - "czech_stemmer": { - "type": "stemmer", - "language": "czech" - } - }, - "analyzer": { - "default": { - "tokenizer": "standard", - "filter": [ - "lowercase", - "czech_stop", - "czech_stemmer" - ] - } - } - } - }, - "mappings": { - "dynamic": false, - "date_detection": false, - "numeric_detection": false, - "properties": {} - } - } - }, - "ui": {} -} \ No newline at end of file diff --git a/oarepo_model_builder/entrypoints.py b/oarepo_model_builder/entrypoints.py new file mode 100644 index 00000000..5b4f846c --- /dev/null +++ b/oarepo_model_builder/entrypoints.py @@ -0,0 +1,27 @@ +import pkg_resources + +from oarepo_model_builder.builder import ModelBuilder + + +def create_builder_from_entrypoints(): + output_classes = load_entry_points_list('oarepo_model_builder.ouptuts') + builder_classes = load_entry_points_list('oarepo_model_builder.builders') + preprocess_classes = load_entry_points_list('oarepo_model_builder.property_preprocessors') + model_preprocessor_classes = load_entry_points_list('oarepo_model_builder.model_preprocessors') + + return ModelBuilder( + output_builders=builder_classes, + outputs=output_classes, + property_preprocessors=preprocess_classes, + model_preprocessors=model_preprocessor_classes + ) + + +def load_entry_points_dict(name): + return {ep.name: ep.load() for ep in pkg_resources.iter_entry_points(group=name)} + + +def load_entry_points_list(name): + ret = [(ep.name, ep.load()) for ep in pkg_resources.iter_entry_points(group=name)] + ret.sort() + return [x[1] for x in ret] diff --git a/oarepo_model_builder/errors.py b/oarepo_model_builder/errors.py deleted file mode 100644 index 08b6ac4f..00000000 --- a/oarepo_model_builder/errors.py +++ /dev/null @@ -1,25 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. - -"""OArepo module that generates data model files from a JSON specification file.""" - -import typing as t -from gettext import gettext as _ - -from click._compat import get_text_stderr -from click.exceptions import ClickException -from click.utils import echo - - -class BuildError(ClickException): - exit_code = 10 - - def show(self, file: t.Optional[t.IO] = None) -> None: - if file is None: - file = get_text_stderr() - - echo(_("Build Failed: {message}").format(message=self.format_message()), file=file) diff --git a/oarepo_model_builder/exceptions.py b/oarepo_model_builder/exceptions.py new file mode 100644 index 00000000..2bce0b27 --- /dev/null +++ b/oarepo_model_builder/exceptions.py @@ -0,0 +1,5 @@ +class IncludedFileNotFoundException(Exception): + """ + Raised when an included file has not been found + """ + pass diff --git a/oarepo_model_builder/invenio/__init__.py b/oarepo_model_builder/invenio/__init__.py new file mode 100644 index 00000000..1127def9 --- /dev/null +++ b/oarepo_model_builder/invenio/__init__.py @@ -0,0 +1,13 @@ +TEMPLATES = { + "record": "templates/invenio_record.py.jinja2", + "record-metadata": "templates/invenio_record_metadata.py.jinja2", + "record-permissions": "templates/invenio_record_permissions.py.jinja2", + "record-search-options": "templates/invenio_record_search_options.py.jinja2", + "record-service-config": "templates/invenio_record_service_config.py.jinja2", + "record-service": "templates/invenio_record_service.py.jinja2", + "record-schema": "templates/invenio_record_schema.py.jinja2", + "record-dumper": "templates/invenio_record_dumper.py.jinja2", + "record-resource-config": "templates/invenio_record_resource_config.py.jinja2", + "record-resource": "templates/invenio_record_resource.py.jinja2", + "blueprint": "templates/invenio_blueprint.py.jinja2", +} diff --git a/oarepo_model_builder/invenio/invenio_base.py b/oarepo_model_builder/invenio/invenio_base.py new file mode 100644 index 00000000..08060527 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_base.py @@ -0,0 +1,22 @@ +from oarepo_model_builder.builders.python import PythonBuilder +from oarepo_model_builder.outputs.python import PythonOutput +from oarepo_model_builder.utils.hyphen_munch import HyphenMunch + + +class InvenioBaseClassPythonBuilder(PythonBuilder): + class_config = None + template = None + + def finish(self, **extra_kwargs): + python_path = self.class_to_path(self.settings.python[self.class_config]) + self.create_parent_modules(python_path) + + output: PythonOutput = self.builder.get_output( + 'python', + python_path + ) + + output.merge( + self.template, + HyphenMunch(settings=self.settings, python=self.settings.python, **extra_kwargs) + ) diff --git a/oarepo_model_builder/invenio/invenio_blueprint.py b/oarepo_model_builder/invenio/invenio_blueprint.py new file mode 100644 index 00000000..5cedcf84 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_blueprint.py @@ -0,0 +1,7 @@ +from .invenio_base import InvenioBaseClassPythonBuilder + + +class InvenioBlueprintBuilder(InvenioBaseClassPythonBuilder): + TYPE = 'invenio_blueprint' + class_config = 'register-blueprint-function' + template = 'blueprint' diff --git a/oarepo_model_builder/invenio/invenio_record.py b/oarepo_model_builder/invenio/invenio_record.py new file mode 100644 index 00000000..88d895ca --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record.py @@ -0,0 +1,7 @@ +from .invenio_base import InvenioBaseClassPythonBuilder + + +class InvenioRecordBuilder(InvenioBaseClassPythonBuilder): + TYPE = 'invenio_record' + class_config = 'record-class' + template = 'record' diff --git a/oarepo_model_builder/invenio/invenio_record_dumper.py b/oarepo_model_builder/invenio/invenio_record_dumper.py new file mode 100644 index 00000000..458dc604 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_dumper.py @@ -0,0 +1,7 @@ +from .invenio_base import InvenioBaseClassPythonBuilder + + +class InvenioRecordDumperBuilder(InvenioBaseClassPythonBuilder): + TYPE = 'invenio_record_dumper' + class_config = 'record-dumper-class' + template = 'record-dumper' diff --git a/oarepo_model_builder/invenio/invenio_record_jsonschemas_poetry.py b/oarepo_model_builder/invenio/invenio_record_jsonschemas_poetry.py new file mode 100644 index 00000000..9b529c81 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_jsonschemas_poetry.py @@ -0,0 +1,20 @@ +from ..builders import OutputBuilder +from ..outputs.toml import TOMLOutput + + +class InvenioRecordJSONSchemasPoetryBuilder(OutputBuilder): + TYPE = 'invenio_record_jsonschemas_poetry' + + def finish(self): + super().finish() + + output: TOMLOutput = self.builder.get_output( + 'toml', + 'pyproject.toml' + ) + + output.set( + "tool.poetry.plugins.'invenio_jsonschemas.schemas'", + self.settings.python.record_jsonschemas_poetry, + self.settings.jsonschemas_package + ) diff --git a/oarepo_model_builder/invenio/invenio_record_metadata.py b/oarepo_model_builder/invenio/invenio_record_metadata.py new file mode 100644 index 00000000..96be4dd1 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_metadata.py @@ -0,0 +1,7 @@ +from .invenio_base import InvenioBaseClassPythonBuilder + + +class InvenioRecordMetadataBuilder(InvenioBaseClassPythonBuilder): + TYPE = 'invenio_record_metadata' + class_config = 'record-metadata-class' + template = 'record-metadata' diff --git a/oarepo_model_builder/invenio/invenio_record_metadata_alembic_poetry.py b/oarepo_model_builder/invenio/invenio_record_metadata_alembic_poetry.py new file mode 100644 index 00000000..4060c54e --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_metadata_alembic_poetry.py @@ -0,0 +1,60 @@ +from ..builders import OutputBuilder +from ..builders.utils import ensure_parent_modules +from ..outputs.toml import TOMLOutput +from ..utils.verbose import log + + +class InvenioRecordMetadataAlembicPoetryBuilder(OutputBuilder): + TYPE = 'invenio_record_metadata_alembic_poetry' + + def finish(self): + super().finish() + + output: TOMLOutput = self.builder.get_output( + 'toml', + 'pyproject.toml' + ) + output.set("tool.poetry.plugins.'invenio_db.alembic'", + self.settings.python.record_schema_metadata_alembic, + f'{self.settings.package}:alembic' + ) + + python_path = self.settings.package_path / 'alembic' / '__init__.py' + # create parent modules if they do not exist + ensure_parent_modules( + self.builder, + python_path, + max_depth=len(python_path.parts) + ) + + # and create empty __init__.py + init_builder = self.builder.get_output( + 'python', + python_path + ) + if init_builder.created: + log(log.INFO, f"""Do not forget to run: + + # if the initial database does not exist yet + invenio db init + + # if the tables do not exist yet (that is, after invenio db init); you have to manually remove + # {self.settings.python.record_metadata_table_name} and its versioned counterpart, otherwise + # alembic below will not work ! + invenio db create + + # create the branch + invenio alembic revision "Create {self.settings.python.record_schema_metadata_alembic} branch." -b {self.settings.python.record_schema_metadata_alembic} -p dbdbc1b19cf2 --empty + + # apply the branch + invenio alembic upgrade heads + + # initial revision + invenio alembic revision "Initial revision." -b {self.settings.python.record_schema_metadata_alembic} + + # inspect the generated file and add import sqlalchemy_utils (invenio template does not contain it + # remove length=16 from UUIDType(length=16), replace Text() with sa.Text() + + # create db tables + invenio alembic upgrade heads + """) diff --git a/oarepo_model_builder/invenio/invenio_record_metadata_models_poetry.py b/oarepo_model_builder/invenio/invenio_record_metadata_models_poetry.py new file mode 100644 index 00000000..c1997717 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_metadata_models_poetry.py @@ -0,0 +1,22 @@ +from ..builders import OutputBuilder +from ..outputs.toml import TOMLOutput + + +class InvenioRecordMetadataModelsPoetryBuilder(OutputBuilder): + TYPE = 'invenio_record_metadata_models_poetry' + + def finish(self): + super().finish() + + output: TOMLOutput = self.builder.get_output( + 'toml', + 'pyproject.toml' + ) + + metadata_package = self.settings.python.record_metadata_class.rsplit('.', maxsplit=1)[0] + + output.set( + "tool.poetry.plugins.'invenio_db.models'", + self.settings.python.record_schema_metadata_poetry, + metadata_package + ) diff --git a/oarepo_model_builder/invenio/invenio_record_permissions.py b/oarepo_model_builder/invenio/invenio_record_permissions.py new file mode 100644 index 00000000..7f3f9e8d --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_permissions.py @@ -0,0 +1,6 @@ +from .invenio_base import InvenioBaseClassPythonBuilder + +class InvenioRecordPermissionsBuilder(InvenioBaseClassPythonBuilder): + TYPE = 'invenio_record_permissions' + class_config = 'record-permissions-class' + template = 'record-permissions' diff --git a/oarepo_model_builder/invenio/invenio_record_resource.py b/oarepo_model_builder/invenio/invenio_record_resource.py new file mode 100644 index 00000000..e40dde10 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_resource.py @@ -0,0 +1,7 @@ +from .invenio_base import InvenioBaseClassPythonBuilder + + +class InvenioRecordResourceBuilder(InvenioBaseClassPythonBuilder): + TYPE = 'invenio_record_resource' + class_config = 'record-resource-class' + template = 'record-resource' diff --git a/oarepo_model_builder/invenio/invenio_record_resource_config.py b/oarepo_model_builder/invenio/invenio_record_resource_config.py new file mode 100644 index 00000000..16c8ff6a --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_resource_config.py @@ -0,0 +1,7 @@ +from .invenio_base import InvenioBaseClassPythonBuilder + + +class InvenioRecordResourceConfigBuilder(InvenioBaseClassPythonBuilder): + TYPE = 'invenio_record_resource_config' + class_config = 'record-resource-config-class' + template = 'record-resource-config' diff --git a/oarepo_model_builder/invenio/invenio_record_resource_poetry.py b/oarepo_model_builder/invenio/invenio_record_resource_poetry.py new file mode 100644 index 00000000..d7ca8865 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_resource_poetry.py @@ -0,0 +1,21 @@ +from ..builders import OutputBuilder +from ..outputs.toml import TOMLOutput + + +class InvenioRecordResourcePoetryBuilder(OutputBuilder): + TYPE = 'invenio_record_resource_poetry' + + def finish(self): + super().finish() + + output: TOMLOutput = self.builder.get_output( + 'toml', + 'pyproject.toml' + ) + + register_function = self.settings.python.register_blueprint_function.rsplit('.', maxsplit=1) + + output.set("tool.poetry.plugins.'invenio_base.api_blueprints'", + self.settings.python.record_resource_blueprint_name, + f'{register_function[0]}:{register_function[-1]}' + ) diff --git a/oarepo_model_builder/invenio/invenio_record_schema.py b/oarepo_model_builder/invenio/invenio_record_schema.py new file mode 100644 index 00000000..8f279a52 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_schema.py @@ -0,0 +1,132 @@ +from collections import defaultdict + +from oarepo_model_builder.builders import process +from oarepo_model_builder.stack import ModelBuilderStack +from .invenio_base import InvenioBaseClassPythonBuilder +from ..outputs.json_stack import JSONStack +from ..utils.deepmerge import deepmerge +from ..utils.schema import is_schema_element + + +class InvenioRecordSchemaBuilder(InvenioBaseClassPythonBuilder): + TYPE = 'invenio_record_schema' + class_config = 'record-schema-class' + template = 'record-schema' + + def begin(self, schema, settings): + super().begin(schema, settings) + self.stack = JSONStack() + self.imports = defaultdict(set) # import => aliases + self.imports['marshmallow'].add('ma') + self.imports['marshmallow.fields'].add('ma_fields') + self.imports['marshmallow.validate'].add('ma_valid') + + def finish(self): + # TODO: generate subschemas as well + # TODO: generate arrays as well + # TODO: handle required + super().finish( + fields=self.stack.value.get('properties', {}), + imports=self.imports, + + ) + + @process('/model/**', condition=lambda current: is_schema_element(current.stack)) + def enter_model_element(self, stack: ModelBuilderStack): + self.model_element_enter(stack) + + # process children + yield + + data = stack.top.data + if isinstance(data, dict): + set_definition = False + if 'oarepo:marshmallow' in data: + self.stack.push('oarepo:marshmallow', data['oarepo:marshmallow']) + self.stack.pop() + set_definition = True + elif stack.stack[-2].key == 'properties': + self.stack.push( + 'oarepo:marshmallow', {}) + + self.stack.pop() + set_definition = True + if set_definition: + self.set_marshmallow_definition(self.stack.real_top) + + self.model_element_leave(stack) + + def set_marshmallow_definition(self, data): + marshmallow = data['oarepo:marshmallow'] + + data_type = data.get('type', 'object') + generator = self.schema.settings.python.marshmallow.mapping.get(data_type, None) + if isinstance(generator, dict): + # a value, not a callable => merge it + marshmallow = deepmerge(generator, marshmallow) + data['marshmallow'] = marshmallow + generator = None + + # add imports if required + for imp in marshmallow.get('imports', []): + self.imports[imp['import']].add(imp['alias']) + + if 'field' in marshmallow: + # do not modify the field + return + if 'class' in marshmallow: + # generate instance of the class, filling the options and validators + marshmallow['field'] = create_field(marshmallow['class'], options=(), validators=(), data=data) + return + + # if no generator from settings, get the default one + if not generator: + generator = default_marshmallow_generators.get(data_type, None) + if not generator: + raise Exception(f'Do not have marshmallow field generator for {data_type}. ' + f'Define it either in invenio_record_schema.py or in your own config') + + # and generate the field + marshmallow['field'] = generator(data, self.schema, self.imports) + + def model_element_enter(self, stack: ModelBuilderStack): + top = stack.top + match stack.top_type: + case stack.PRIMITIVE: + self.stack.push(top.key, top.data) + case stack.LIST: + self.stack.push(top.key, []) + case stack.DICT: + self.stack.push(top.key, {}) + + def model_element_leave(self, stack: ModelBuilderStack): + self.stack.pop() + + +def create_field(field_type, options=(), validators=(), data=None): + opts = [*options, *data['oarepo:marshmallow'].get('options', [])] + validators = [*validators, *data['oarepo:marshmallow'].get('validators', [])] + nested = data['oarepo:marshmallow'].get('nested', False) + if validators: + opts.append(f'validate=[{",".join(validators)}]') + ret = f'{field_type}({", ".join(opts)})' + if nested: + ret = f'ma_fields.Nested({ret})' + return ret + + +def marshmallow_string_generator(data, schema, imports): + validators = [] + min_length = data.get('minLength', None) + max_length = data.get('maxLength', None) + if min_length is not None or max_length is not None: + validators.append(f'ma_valid.Length(min={min_length}, max={max_length})') + return create_field('ma_fields.String', [], validators, data) + + +# TODO: rest of supported schema types + + +default_marshmallow_generators = { + 'string': marshmallow_string_generator +} diff --git a/oarepo_model_builder/invenio/invenio_record_search.py b/oarepo_model_builder/invenio/invenio_record_search.py new file mode 100644 index 00000000..25604093 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_search.py @@ -0,0 +1,6 @@ +from .invenio_base import InvenioBaseClassPythonBuilder + +class InvenioRecordSearchOptionsBuilder(InvenioBaseClassPythonBuilder): + TYPE = 'invenio_record_search' + class_config = 'record-search-options-class' + template = 'record-search-options' diff --git a/oarepo_model_builder/invenio/invenio_record_search_poetry.py b/oarepo_model_builder/invenio/invenio_record_search_poetry.py new file mode 100644 index 00000000..830b97e1 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_search_poetry.py @@ -0,0 +1,20 @@ +from ..builders import OutputBuilder +from ..outputs.toml import TOMLOutput + + +class InvenioRecordSearchPoetryBuilder(OutputBuilder): + TYPE = 'invenio_record_search_poetry' + + def finish(self): + super().finish() + + output: TOMLOutput = self.builder.get_output( + 'toml', + 'pyproject.toml' + ) + + output.set( + "tool.poetry.plugins.'invenio_search.mappings'", + self.settings.python.record_mapping_poetry, + self.settings.mapping_package + ) diff --git a/oarepo_model_builder/invenio/invenio_record_service.py b/oarepo_model_builder/invenio/invenio_record_service.py new file mode 100644 index 00000000..85a762a7 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_service.py @@ -0,0 +1,7 @@ +from .invenio_base import InvenioBaseClassPythonBuilder + + +class InvenioRecordServiceBuilder(InvenioBaseClassPythonBuilder): + TYPE = 'invenio_record_service' + class_config = 'record-service-class' + template = 'record-service' diff --git a/oarepo_model_builder/invenio/invenio_record_service_config.py b/oarepo_model_builder/invenio/invenio_record_service_config.py new file mode 100644 index 00000000..d59fdbff --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_record_service_config.py @@ -0,0 +1,7 @@ +from .invenio_base import InvenioBaseClassPythonBuilder + + +class InvenioRecordServiceConfigBuilder(InvenioBaseClassPythonBuilder): + TYPE = 'invenio_record_service_config' + class_config = 'record-service-config-class' + template = 'record-service-config' diff --git a/oarepo_model_builder/invenio/invenio_sample_app_poetry.py b/oarepo_model_builder/invenio/invenio_sample_app_poetry.py new file mode 100644 index 00000000..91852f52 --- /dev/null +++ b/oarepo_model_builder/invenio/invenio_sample_app_poetry.py @@ -0,0 +1,48 @@ +from ..builders import OutputBuilder +from ..outputs.toml import TOMLOutput +from ..utils.verbose import log + + +class InvenioSampleAppPoetryBuilder(OutputBuilder): + TYPE = 'invenio_sample_app_poetry' + + def finish(self): + super().finish() + + output: TOMLOutput = self.builder.get_output( + 'toml', + 'pyproject.toml' + ) + + output.setdefault("tool.poetry", + "name", self.settings.package_base.replace('_', '-'), + "version", "0.0.1", + "description", f"A sample application for {self.settings.package}", + "authors", []) + + output.setdefault("build-system", + "requires", ['poetry-core>=1.0.0'], + "build-backend", "poetry.core.masonry.api") + + output.setdefault("tool.poetry.dependencies", "python", "^3.9") + output.setdefault("tool.poetry.dependencies", "python", "^3.9") + + output.setdefault("tool.poetry.dependencies.invenio", + 'version', '^3.5.0a1', + 'extras', ["base", "auth", "metadata", "files", "postgresql", "elasticsearch7"], + 'optional', True, + 'allow-prereleases', True + ) + + output.setdefault("tool.poetry.dependencies.invenio-records-resources", + 'version', '^0.17.3', + 'optional', True, + 'allow-prereleases', True + ) + + output.setdefault("tool.poetry.extras", 'sample-app', ['invenio', 'invenio-records-resources']) + + if output.created: + log(log.INFO, f"""To install the sample app, run + poetry install -E sample-app + """) diff --git a/oarepo_model_builder/invenio/templates/invenio_blueprint.py.jinja2 b/oarepo_model_builder/invenio/templates/invenio_blueprint.py.jinja2 new file mode 100644 index 00000000..67091945 --- /dev/null +++ b/oarepo_model_builder/invenio/templates/invenio_blueprint.py.jinja2 @@ -0,0 +1,12 @@ +from {{ python.record_service_config_class|package_name }} import {{ python.record_service_config_class|base_name }} +from {{ python.record_service_class|package_name }} import {{ python.record_service_class|base_name }} +from {{ python.record_resource_config_class|package_name }} import {{ python.record_resource_config_class|base_name }} +from {{ python.record_resource_class|package_name }} import {{ python.record_resource_class|base_name }} + +def {{ python.register_blueprint_function|base_name }}(app): + service_config = {{ python.record_service_config_class|base_name }}() + resource_config = {{ python.record_resource_config_class|base_name }}() + service = {{ python.record_service_class|base_name }}(config=service_config) + resource = {{ python.record_resource_class|base_name }}(service=service, config=resource_config) + + return resource.as_blueprint() diff --git a/oarepo_model_builder/invenio/templates/invenio_record.py.jinja2 b/oarepo_model_builder/invenio/templates/invenio_record.py.jinja2 new file mode 100644 index 00000000..0172d889 --- /dev/null +++ b/oarepo_model_builder/invenio/templates/invenio_record.py.jinja2 @@ -0,0 +1,23 @@ +from invenio_records.systemfields import ConstantField +from invenio_records_resources.records.systemfields import IndexField +from invenio_records_resources.records.systemfields.pid import PIDField, PIDFieldContext +from invenio_pidstore.providers.recordid_v2 import RecordIdProviderV2 +from invenio_records_resources.records.api import Record as InvenioBaseRecord + +{% for b in python.record_bases %} +from {{ b|package_name }} import {{ b|base_name }} +{% endfor %} +from {{ python.record_metadata_class|package_name }} import {{ python.record_metadata_class|base_name }} +from {{ python.record_dumper_class|package_name }} import {{ python.record_dumper_class|base_name }} + + +class {{ python.record_class|base_name }}({% for b in python.record_bases %}{{ b|base_name }}, {% endfor %}InvenioBaseRecord): + model_cls = {{ python.record_metadata_class|base_name }} + schema = ConstantField("$schema", "{{ settings.schema_server }}{{ settings.schema_name }}") + index = IndexField("{{ settings.index_name }}") + pid = PIDField( + create=True, + provider=RecordIdProviderV2, + context_cls = PIDFieldContext + ) + dumper = {{ python.record_dumper_class|base_name }}() diff --git a/oarepo_model_builder/invenio/templates/invenio_record_dumper.py.jinja2 b/oarepo_model_builder/invenio/templates/invenio_record_dumper.py.jinja2 new file mode 100644 index 00000000..881fae20 --- /dev/null +++ b/oarepo_model_builder/invenio/templates/invenio_record_dumper.py.jinja2 @@ -0,0 +1,9 @@ +from invenio_records.dumpers import ElasticsearchDumper as InvenioElasticsearchDumper + + +{% for b in python.record_dumper_bases %} +from {{ b|package_name }} import {{ b|base_name }} +{% endfor %} + +class {{ python.record_dumper_class|base_name }}({% for b in python.record_dumper_bases %}{{ b|base_name }}, {% endfor %}InvenioElasticsearchDumper): + """{{ python.record_class|base_name }} elasticsearch dumper.""" diff --git a/oarepo_model_builder/invenio/templates/invenio_record_metadata.py.jinja2 b/oarepo_model_builder/invenio/templates/invenio_record_metadata.py.jinja2 new file mode 100644 index 00000000..5c6e4f14 --- /dev/null +++ b/oarepo_model_builder/invenio/templates/invenio_record_metadata.py.jinja2 @@ -0,0 +1,14 @@ +from invenio_db import db +from invenio_records.models import RecordMetadataBase + +{% for b in python.record_metadata_bases %} +from {{ b|package_name }} import {{ b|base_name }} +{% endfor %} + +class {{ python.record_metadata_class|base_name }}({% for b in python.record_metadata_bases %}{{ b|base_name }}, {% endfor %}db.Model, RecordMetadataBase): + """Model for {{ python.record_class|base_name }} metadata.""" + + __tablename__ = "{{ python.record_metadata_table_name }}" + + # Enables SQLAlchemy-Continuum versioning + __versioned__ = {} diff --git a/oarepo_model_builder/invenio/templates/invenio_record_permissions.py.jinja2 b/oarepo_model_builder/invenio/templates/invenio_record_permissions.py.jinja2 new file mode 100644 index 00000000..1aacd1b9 --- /dev/null +++ b/oarepo_model_builder/invenio/templates/invenio_record_permissions.py.jinja2 @@ -0,0 +1,17 @@ +from invenio_records_permissions import RecordPermissionPolicy +from invenio_records_permissions.generators import SystemProcess, AnyUser + + +{% for b in python.record_permissions_bases %} +from {{ b|package_name }} import {{ b|base_name }} +{% endfor %} + +class {{ python.record_permissions_class|base_name }}({% for b in python.record_permissions_bases %}{{ b|base_name }}, {% endfor %}RecordPermissionPolicy): + """{{ python.record_class }} permissions.""" + + can_search = [SystemProcess(), AnyUser()] + can_read = [SystemProcess(), AnyUser()] + can_create = [SystemProcess()] + can_update = [SystemProcess()] + can_delete = [SystemProcess()] + can_manage = [SystemProcess()] diff --git a/oarepo_model_builder/invenio/templates/invenio_record_resource.py.jinja2 b/oarepo_model_builder/invenio/templates/invenio_record_resource.py.jinja2 new file mode 100644 index 00000000..72ec1aac --- /dev/null +++ b/oarepo_model_builder/invenio/templates/invenio_record_resource.py.jinja2 @@ -0,0 +1,12 @@ +from invenio_records_resources.resources import RecordResource as InvenioRecordResource + + +{% for b in python.record_resource_class_bases %} +from {{ b|package_name }} import {{ b|base_name }} +{% endfor %} + + +class {{ python.record_resource_class|base_name }}({% for b in python.record_resource_class_bases %}{{ b|base_name }}, {% endfor %}InvenioRecordResource): + """{{ python.record_class|base_name }} resource.""" + # here you can for example redefine + # create_url_rules function to add your own rules diff --git a/oarepo_model_builder/invenio/templates/invenio_record_resource_config.py.jinja2 b/oarepo_model_builder/invenio/templates/invenio_record_resource_config.py.jinja2 new file mode 100644 index 00000000..2f6daf29 --- /dev/null +++ b/oarepo_model_builder/invenio/templates/invenio_record_resource_config.py.jinja2 @@ -0,0 +1,12 @@ +from invenio_records_resources.resources import RecordResourceConfig as InvenioRecordResourceConfig + + +{% for b in python.record_resource_config_class_bases %} +from {{ b|package_name }} import {{ b|base_name }} +{% endfor %} + +class {{ python.record_resource_config_class|base_name }}({% for b in python.record_resource_config_class_bases %}{{ b|base_name }}, {% endfor %}InvenioRecordResourceConfig): + """{{ python.record_class|base_name }} resource config.""" + + blueprint_name = '{{ python.record_resource_blueprint_name }}' + url_prefix = '{{ settings.collection_url }}' diff --git a/oarepo_model_builder/invenio/templates/invenio_record_schema.py.jinja2 b/oarepo_model_builder/invenio/templates/invenio_record_schema.py.jinja2 new file mode 100644 index 00000000..4f4160ad --- /dev/null +++ b/oarepo_model_builder/invenio/templates/invenio_record_schema.py.jinja2 @@ -0,0 +1,31 @@ +from invenio_records_resources.services.records.schema import BaseRecordSchema as InvenioBaseRecordSchema + + +{% for b in python.record_schema_bases %} +from {{ b|package_name }} import {{ b|base_name }} +{% endfor %} + +{% for b in python.record_schema_properties_bases %} +from {{ b|package_name }} import {{ b|base_name }} +{% endfor %} + +{% for imp, imp_aliases in imports.items() %} +{% for alias in imp_aliases %} +import {{ imp }} as {{ alias }} +{% endfor %} +{% endfor %} + +{% if settings.top_level_metadata %} +class {{ python.record_schema_metadata_class|base_name }}({% for b in python.record_schema_properties_bases %}{{ b|base_name }}, {% endfor %}ma.Schema): +{% else %} +class {{ python.record_schema_class|base_name }}({% for b in python.record_schema_bases %}{{ b|base_name }}, {% endfor %}InvenioBaseRecordSchema): +{% endif %} + """{{ python.record_class|base_name }} schema.""" + {% for fld_name, fld_def in fields.items() %} + {{ fld_name }} = {{ fld_def["oarepo:marshmallow"].field }} + {% endfor %} + +{% if settings.top_level_metadata %} +class {{ python.record_schema_class|base_name }}({% for b in python.record_schema_bases %}{{ b|base_name }}, {% endfor %}InvenioBaseRecordSchema): + metadata = ma_fields.Nested({{ python.record_schema_metadata_class|base_name }}()) +{% endif %} diff --git a/oarepo_model_builder/invenio/templates/invenio_record_search_options.py.jinja2 b/oarepo_model_builder/invenio/templates/invenio_record_search_options.py.jinja2 new file mode 100644 index 00000000..0e2bf006 --- /dev/null +++ b/oarepo_model_builder/invenio/templates/invenio_record_search_options.py.jinja2 @@ -0,0 +1,9 @@ +from invenio_records_resources.services import SearchOptions as InvenioSearchOptions + + +{% for b in python.record_search_options_bases %} +from {{ b|package_name }} import {{ b|base_name }} +{% endfor %} + +class {{ python.record_search_options_class|base_name }}({% for b in python.record_search_options_bases %}{{ b|base_name }}, {% endfor %}InvenioSearchOptions): + """{{ python.record_class|base_name }} search options.""" diff --git a/oarepo_model_builder/invenio/templates/invenio_record_service.py.jinja2 b/oarepo_model_builder/invenio/templates/invenio_record_service.py.jinja2 new file mode 100644 index 00000000..b34bf34b --- /dev/null +++ b/oarepo_model_builder/invenio/templates/invenio_record_service.py.jinja2 @@ -0,0 +1,10 @@ +from invenio_records_resources.services import RecordService as InvenioRecordService + + +{% for b in python.record_service_bases %} +from {{ b|package_name }} import {{ b|base_name }} +{% endfor %} + + +class {{ python.record_service_class|base_name }}({% for b in python.record_service_bases %}{{ b|base_name }}, {% endfor %}InvenioRecordService): + """{{ python.record_class|base_name }} service.""" diff --git a/oarepo_model_builder/invenio/templates/invenio_record_service_config.py.jinja2 b/oarepo_model_builder/invenio/templates/invenio_record_service_config.py.jinja2 new file mode 100644 index 00000000..1d87f4c4 --- /dev/null +++ b/oarepo_model_builder/invenio/templates/invenio_record_service_config.py.jinja2 @@ -0,0 +1,35 @@ +from invenio_records_resources.services import RecordServiceConfig as InvenioRecordServiceConfig +from invenio_records_resources.services import RecordLink, pagination_links +from invenio_records_resources.services.records.components import DataComponent, MetadataComponent + + +{% for b in python.record_service_config_bases %} +from {{ b|package_name }} import {{ b|base_name }} +{% endfor %} +from {{ python.record_class|package_name }} import {{ python.record_class|base_name }} +from {{ python.record_permissions_class|package_name }} import {{ python.record_permissions_class|base_name }} +from {{ python.record_schema_class|package_name }} import {{ python.record_schema_class|base_name }} +from {{ python.record_search_options_class|package_name }} import {{ python.record_search_options_class|base_name }} + + +class {{ python.record_service_config_class|base_name }}({% for b in python.record_service_config_bases %}{{ b|base_name }}, {% endfor %}InvenioRecordServiceConfig): + """{{ python.record_class|base_name }} service config.""" + + permission_policy_cls = {{ python.record_permissions_class|base_name }} + schema = {{ python.record_schema_class|base_name }} + search = {{ python.record_search_options_class|base_name }} + record_cls = {{ python.record_class|base_name }} + + components = [ + DataComponent, + MetadataComponent + ] + + @property + def links_item(self): + return { + "self": RecordLink("{{ settings.collection_url }}/{id}"), + } + + links_search = pagination_links("{{ settings.collection_url }}{?args*}") + diff --git a/oarepo_model_builder/loaders/__init__.py b/oarepo_model_builder/loaders/__init__.py new file mode 100644 index 00000000..74e5f66e --- /dev/null +++ b/oarepo_model_builder/loaders/__init__.py @@ -0,0 +1,41 @@ +try: + import json5 +except ImportError: + import json as json5 + + +def json_loader(file_path, schema): + with open(file_path) as f: + return json5.load(f) + + +json_loader.safe = True + + +def yaml_loader(file_path, schema): + try: + import yaml + except ImportError: + raise Exception('Loader for yaml not found. Please install pyyaml library to use yaml files') + + with open(file_path) as f: + return yaml.full_load(f) + + +yaml_loader.safe = True + + +def python_loader(file_path, schema): + with open(file_path) as f: + code = f.read() + # hope that user knows what he/she is doing + gls = {} + exec(code, gls) + ret = {} + for k, v in gls: + try: + json5.dumps(v) + except: + continue + ret[k] = v + return ret diff --git a/oarepo_model_builder/model_preprocessors/__init__.py b/oarepo_model_builder/model_preprocessors/__init__.py new file mode 100644 index 00000000..91dd5d1a --- /dev/null +++ b/oarepo_model_builder/model_preprocessors/__init__.py @@ -0,0 +1,18 @@ +class ModelPreprocessor: + def __init__(self, builder: 'oarepo_model_builder.builder.ModelBuilder'): + self.builder = builder + + def transform(self, schema, settings): + pass + + def set(self, settings, name, callable=None): + if callable: + if not settings.get(name): + settings[name] = callable() + return + + def w(func): + if not settings.get(name): + settings[name] = func() + + return w diff --git a/oarepo_model_builder/model_preprocessors/default_values.py b/oarepo_model_builder/model_preprocessors/default_values.py new file mode 100644 index 00000000..734ed2be --- /dev/null +++ b/oarepo_model_builder/model_preprocessors/default_values.py @@ -0,0 +1,60 @@ +import os +from pathlib import Path +from typing import Dict + +from . import ModelPreprocessor +from ..schema import ModelSchema + + +class DefaultValuesModelPreprocessor(ModelPreprocessor): + TYPE = 'default' + + def transform(self, schema: ModelSchema, settings: Dict): + self.set(settings, 'package', lambda: os.path.basename(os.getcwd()).replace('-', '_')) + + self.set(settings, 'processing-order', lambda: ['settings', '*', 'model']) + + self.set(settings, 'package-base', lambda: settings.package.rsplit('.', maxsplit=1)[-1]) + + self.set(settings, 'kebap-package', lambda: settings.package.replace('_', '-')) + + @self.set(settings, 'package-path') + def c(): + package_path = settings.package.split('.') + return Path(package_path[0]).joinpath(*package_path[1:]) + + self.set(settings, 'schema-version', lambda: '1.0.0') + + self.set(settings, 'schema-name', lambda: f"{settings.kebap_package}-{settings.schema_version}.json") + + self.set( + settings, 'schema-file', lambda: os.path.join( + settings.package_path, + 'jsonschemas', + settings.schema_name + ) + ) + + self.set( + settings, 'mapping-package', lambda: f'{settings.package}.mappings' + ) + + self.set( + settings, 'jsonschemas-package', lambda: f'{settings.package}.jsonschemas' + ) + + self.set( + settings, 'mapping-file', lambda: os.path.join( + settings.package_path, + 'mappings', + 'v7', + settings.package_base, + settings.schema_name + ) + ) + + self.set(settings, 'schema-server', lambda: 'http://localhost/schemas/') + + self.set(settings, 'index-name', lambda: settings.package_base + '-' + os.path.basename(settings.mapping_file).replace('.json', '')) + + self.set(settings, 'collection-url', lambda: f'/{settings.package_base}/') diff --git a/oarepo_model_builder/model_preprocessors/elasticsearch.py b/oarepo_model_builder/model_preprocessors/elasticsearch.py new file mode 100644 index 00000000..116481fe --- /dev/null +++ b/oarepo_model_builder/model_preprocessors/elasticsearch.py @@ -0,0 +1,34 @@ +from oarepo_model_builder.utils.deepmerge import deepmerge +from oarepo_model_builder.model_preprocessors import ModelPreprocessor + + +class ElasticsearchModelPreprocessor(ModelPreprocessor): + TYPE = 'elasticsearch' + + def transform(self, schema, settings): + deepmerge( + settings, + { + 'elasticsearch': { + 'version': 'v7', + 'keyword-ignore-above': 50, + 'templates': { + 'v7': { + 'mappings': { + "properties": { + "id": { + "type": "keyword" + }, + "created": { + "type": "date" + }, + "updated": { + "type": "date" + } + } + } + } + } + } + } + ) diff --git a/oarepo_model_builder/model_preprocessors/invenio.py b/oarepo_model_builder/model_preprocessors/invenio.py new file mode 100644 index 00000000..bc3325d8 --- /dev/null +++ b/oarepo_model_builder/model_preprocessors/invenio.py @@ -0,0 +1,71 @@ +from oarepo_model_builder.utils.deepmerge import deepmerge +from oarepo_model_builder.model_preprocessors import ModelPreprocessor +from oarepo_model_builder.utils.camelcase import camel_case, snake_case + + +def last_item(x): + return x.rsplit('.', maxsplit=1)[-1] + + +class InvenioModelPreprocessor(ModelPreprocessor): + TYPE = 'invenio' + + def transform(self, schema, settings): + deepmerge(settings, { + 'python': { + 'record_prefix': camel_case(settings.package.rsplit('.', maxsplit=1)[-1]), + # just make sure that the templates is always there + 'templates': { + }, + 'marshmallow': { + 'mapping': { + } + } + } + }) + + settings.setdefault('top-level-metadata', True) + + record_prefix = settings.python.record_prefix + self.set(settings.python, 'record-prefix-snake', + lambda: snake_case(settings.python.record_prefix)) + + self.set(settings.python, 'record-class', + lambda: (f'{settings.package}.record.{record_prefix}Record')) + self.set(settings.python, 'record-schema-class', + lambda: (f'{settings.package}.schema.{record_prefix}Schema')) + self.set(settings.python, 'record-schema-metadata-class', + lambda: (f'{settings.package}.schema.{record_prefix}MetadataSchema')) + self.set(settings.python, 'record-schema-metadata-alembic', + lambda: (f'{settings.package_base}')) + self.set(settings.python, 'record-schema-metadata-poetry', + lambda: (f'{settings.package_base}')) + self.set(settings.python, 'record-metadata-class', + lambda: (f'{settings.package}.metadata.{record_prefix}Metadata')) + + self.set(settings.python, 'record-mapping-poetry', + lambda: (f'{settings.package_base}')) + + self.set(settings.python, 'record-jsonschemas-poetry', + lambda: (f'{settings.package_base}')) + + self.set(settings.python, 'record-permissions-class', + lambda: (f'{settings.package}.permissions.{record_prefix}PermissionPolicy')) + self.set(settings.python, 'record-dumper-class', + lambda: (f'{settings.package}.dumper.{record_prefix}Dumper')) + self.set(settings.python, 'record-metadata-table-name', + lambda: f'{record_prefix.lower()}_metadata') + self.set(settings.python, 'record-search-options-class', + lambda: (f'{settings.package}.search_options.{record_prefix}SearchOptions')) + self.set(settings.python, 'record-service-config-class', + lambda: (f'{settings.package}.service_config.{record_prefix}ServiceConfig')) + self.set(settings.python, 'record-service-class', + lambda: (f'{settings.package}.service.{record_prefix}Service')) + self.set(settings.python, 'record-resource-config-class', + lambda: (f'{settings.package}.resource.{record_prefix}ResourceConfig')) + self.set(settings.python, 'record-resource-class', + lambda: (f'{settings.package}.resource.{record_prefix}Resource')) + self.set(settings.python, 'record-resource-blueprint-name', + lambda: record_prefix) + self.set(settings.python, 'register-blueprint-function', + lambda: (f'{settings.package}.blueprint.register_blueprint')) diff --git a/oarepo_model_builder/outputs/__init__.py b/oarepo_model_builder/outputs/__init__.py index 602aa8c0..bbf29a95 100644 --- a/oarepo_model_builder/outputs/__init__.py +++ b/oarepo_model_builder/outputs/__init__.py @@ -1,6 +1,19 @@ -from .jsonschema import JsonSchemaOutput -from .mapping import MappingOutput -from .output import BaseOutput -from .ui import UIOutput +from pathlib import Path -__all__ = ('BaseOutput', 'JsonSchemaOutput', 'MappingOutput', 'UIOutput') + +class OutputBase: + output_type = None + + def __init__(self, builder, path: Path): + self.builder = builder + self.path: Path = path + + def begin(self): + raise NotImplemented() + + def finish(self): + raise NotImplemented() + + @property + def created(self): + raise NotImplementedError() diff --git a/oarepo_model_builder/outputs/json.py b/oarepo_model_builder/outputs/json.py index 827413e2..1095eca9 100644 --- a/oarepo_model_builder/outputs/json.py +++ b/oarepo_model_builder/outputs/json.py @@ -1,25 +1,56 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. - -"""OArepo module that generates data model files from a JSON specification file.""" import json -import os -from oarepo_model_builder.outputs.output import BaseOutput +from deepdiff import DeepDiff +from . import OutputBase +from .json_stack import JSONStack +from ..utils.verbose import log -class JsonOutput(BaseOutput): - """Output that handles JSON formatted data.""" +try: + import json5 +except ImportError: + import json as json5 - def save(self): - if self.data and self.path: - parent = os.path.dirname(self.path) - if not os.path.exists(parent): - os.makedirs(parent) - # TODO: maybe add __init__.py automatically into each created dir? - with open(self.path, mode='w') as fp: - fp.write(json.dumps(self.data, indent=2, sort_keys=True)) + +class JSONOutput(OutputBase): + IGNORE_NODE = JSONStack.IGNORED_NODE + IGNORE_SUBTREE = JSONStack.IGNORED_SUBTREE + + def begin(self): + try: + with self.builder.open(self.path) as f: + self.original_data = json5.load(f) # noqa + except FileNotFoundError: + self.original_data = None + except ValueError: + self.original_data = None + + self.stack = JSONStack() + + @property + def created(self): + return self.original_data is None + + def finish(self): + data = self.stack.value + if DeepDiff(data, self.original_data): + self.path.parent.mkdir(parents=True, exist_ok=True) + log(2, 'Saving %s', self.path) + with self.builder.open(self.path, mode='w') as f: + json.dump(data, f, ensure_ascii=False, indent=4) + + def enter(self, key, el): + if key: + self.stack.push(key, el) + + def leave(self): + if not self.stack.empty: + self.stack.pop() + + def primitive(self, key, value): + if key: + self.stack.push(key, value) + self.stack.pop() + + def merge(self, value): + self.stack.merge(value) \ No newline at end of file diff --git a/oarepo_model_builder/outputs/json_stack.py b/oarepo_model_builder/outputs/json_stack.py new file mode 100644 index 00000000..3e6cd018 --- /dev/null +++ b/oarepo_model_builder/outputs/json_stack.py @@ -0,0 +1,69 @@ +import copy +from typing import List + +from oarepo_model_builder.utils.deepmerge import deepmerge + + +class JSONStack: + """Hierarchic json builder.""" + IGNORED_SUBTREE = object() + IGNORED_NODE = object() + + def __init__(self): + self.stack: List = [{}] + + def should_ignore(self, element): + return element is self.IGNORED_SUBTREE or element is self.IGNORED_NODE + + def push(self, key, el): + if key is None: + assert isinstance(el, dict) + assert self.empty + self.stack[0] = deepmerge(copy.deepcopy(el), self.stack[0], []) + return + top = self.stack[-1] + if top is self.IGNORED_SUBTREE: + self.stack.append(self.IGNORED_SUBTREE) + elif self.should_ignore(el): + self.stack.append(el) + else: + if top is self.IGNORED_NODE: + top = self.real_top + + el = copy.deepcopy(el) + if isinstance(top, dict): + if key not in top: + top[key] = el + else: + top[key] = deepmerge(el, top[key]) + elif isinstance(top, list): + if key < len(top): + top[key] = deepmerge(el, top[key]) + else: + assert key == len(top) + top.append(el) + else: + raise NotImplemented(f'Set for datatype {type(top)} is not implemented') + self.stack.append(el) + + def pop(self): + if not self.empty: + self.stack.pop() + + @property + def empty(self): + return len(self.stack) == 1 + + @property + def value(self): + return self.stack[0] + + @property + def real_top(self): + for t in reversed(self.stack): + if t is not self.IGNORED_NODE: + return t + + def merge(self, value): + real_top = self.real_top + real_top.update(deepmerge(value, real_top, [])) diff --git a/oarepo_model_builder/outputs/jsonschema.py b/oarepo_model_builder/outputs/jsonschema.py index 9ef3dbe4..b9537a55 100644 --- a/oarepo_model_builder/outputs/jsonschema.py +++ b/oarepo_model_builder/outputs/jsonschema.py @@ -1,23 +1,5 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. +from .json import JSONOutput -"""OArepo module that generates data model files from a JSON specification file.""" -from copy import deepcopy -from oarepo_model_builder.outputs.json import JsonOutput -from oarepo_model_builder.proxies import current_model_builder - - -class JsonSchemaOutput(JsonOutput): - """Output class for jsonschema.""" - output_type = 'jsonschema' - - def __init__(self, path=None, data=None): - if data is None: - data = deepcopy(current_model_builder.model_config.get('jsonschema', {})) - - super().__init__(path, data) +class JSONSchemaOutput(JSONOutput): + TYPE = 'jsonschema' diff --git a/oarepo_model_builder/outputs/mapping.py b/oarepo_model_builder/outputs/mapping.py index 907510d3..1ba9ae9a 100644 --- a/oarepo_model_builder/outputs/mapping.py +++ b/oarepo_model_builder/outputs/mapping.py @@ -1,23 +1,9 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. +from .json import JSONOutput -"""OArepo module that generates data model files from a JSON specification file.""" -from copy import deepcopy -from oarepo_model_builder.outputs.json import JsonOutput -from oarepo_model_builder.proxies import current_model_builder +class MappingOutput(JSONOutput): + TYPE = 'mapping' + def merge_mapping(self, mapping): + self.stack.merge(mapping) -class MappingOutput(JsonOutput): - """ES Mapping output.""" - output_type = 'mapping' - - def __init__(self, path=None, data=None): - if data is None: - data = deepcopy(current_model_builder.model_config.get('search').get('mapping', {})) - - super().__init__(path, data) diff --git a/oarepo_model_builder/outputs/output.py b/oarepo_model_builder/outputs/output.py deleted file mode 100644 index cbf3a61d..00000000 --- a/oarepo_model_builder/outputs/output.py +++ /dev/null @@ -1,39 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. - -"""OArepo module that generates data model files from a JSON specification file.""" - -from deepmerge import Merger - -_output_merger = Merger( - [ - (list, ["append"]), - (dict, ["merge"]), - (set, ["union"]) - ], - ["override"], - ["override"] -) - - -class BaseOutput: - """Base output handler interface.""" - output_type = None - - def __init__(self, path=None, data=None): - self.path = path - self._data = data - - def save(self): - raise NotImplemented - - @property - def data(self): - return self._data - - def __call__(self, *args, **kwargs): - pass diff --git a/oarepo_model_builder/outputs/python.py b/oarepo_model_builder/outputs/python.py new file mode 100644 index 00000000..9e98cc24 --- /dev/null +++ b/oarepo_model_builder/outputs/python.py @@ -0,0 +1,108 @@ +import sys +import libcst as cst +from jinja2 import Environment, FunctionLoader + +from oarepo_model_builder.templates import templates +from oarepo_model_builder.outputs import OutputBase +from oarepo_model_builder.utils.cst import MergingTransformer +from oarepo_model_builder.utils.verbose import log + + +class PythonOutput(OutputBase): + TYPE = 'python' + + def begin(self): + try: + with self.builder.open(self.path) as f: + self.original_data = f.read() + + self.cst = cst.parse_module(self.original_data) + except FileNotFoundError: + self.original_data = None + self.cst = cst.parse_module('') + + @property + def created(self): + return self.original_data is None + + def finish(self): + code = self.cst.code + if code != self.original_data: + self.path.parent.mkdir(parents=True, exist_ok=True) + log(2, 'Saving %s', self.path) + with self.builder.open(self.path, mode='w') as f: + f.write(code) + if self.builder.schema.settings.python.use_isort: + import isort + config = isort.settings.Config(verbose=False, quiet=True) + isort.file(self.path, config=config) + if self.builder.schema.settings.python.use_black: + import subprocess + subprocess.call([ + 'black', + '-q', + '--experimental-string-processing', + str(self.path) + ]) + + def merge(self, template_name, context, filters=None): + # template is a loadable resource + env = Environment( + loader=FunctionLoader(lambda tn: templates.get_template(tn, context['settings'])), + autoescape=False, + ) + self.register_default_filters(env) + for filter_name, filter_func in (filters or {}).items(): + env.filters[filter_name] = filter_func + + rendered = env.get_template(template_name).render(context) + try: + rendered_cst = cst.parse_module(rendered, config=self.cst.config_for_parsing) + except: + print(rendered, file=sys.stderr) + raise + self.cst = self.cst.visit(MergingTransformer(rendered_cst)) + + def register_default_filters(self, env): + env.filters['package_name'] = lambda value: (value.rsplit('.', maxsplit=1)[0]) + env.filters['base_name'] = lambda value: (value.rsplit('.', maxsplit=1)[1]) + + +class CSTPart: + def __init__(self, output, path): + self.output = output + self.path = path + + @property + def _cst(self): + current = self.output.cst + for p in self.path: + for c in current.body: + if isinstance(c, p[0]) and c.name.value == p[1]: + current = c + break + else: + return None + return current + + @property + def exists(self): + return self._cst is not None + + def _create(self, expr): + self.output.create_cst( + cst.parse_module(expr, config=self.output.cst.config_for_parsing).body, + self.path[:-1] + ) + + +class PythonClass(CSTPart): + def __init__(self, output, path): + super().__init__(output, path) + + def create(self, name=None): + expr = f""" +class {name}: + pass + """ + return self._create(expr) diff --git a/oarepo_model_builder/outputs/text.py b/oarepo_model_builder/outputs/text.py new file mode 100644 index 00000000..ae68bc51 --- /dev/null +++ b/oarepo_model_builder/outputs/text.py @@ -0,0 +1,22 @@ +from oarepo_model_builder.outputs import OutputBase + + +class TextOutput(OutputBase): + TYPE = 'text' + + def begin(self): + try: + with self.builder.open(self.path) as f: + self.text = self.original_data = f.read() + except FileNotFoundError: + self.original_data = None + self.text = '' + + @property + def created(self): + return self.original_data is None + + def finish(self): + if self.text != self.original_data: + with self.builder.open(self.path, 'w') as f: + f.write(self.text) diff --git a/oarepo_model_builder/outputs/toml.py b/oarepo_model_builder/outputs/toml.py new file mode 100644 index 00000000..a9e2f0a0 --- /dev/null +++ b/oarepo_model_builder/outputs/toml.py @@ -0,0 +1,98 @@ +from tomlkit.exceptions import NonExistentKey + +from oarepo_model_builder.outputs import OutputBase +import tomlkit, tomlkit.items + + +class TOMLOutput(OutputBase): + TYPE = 'toml' + + def begin(self): + try: + with self.builder.open(self.path) as f: + self.original_data = f.read() + self.toml = tomlkit.parse(self.original_data) + self.parsed = tomlkit.dumps(self.toml, sort_keys=True) + except FileNotFoundError: + self.original_data = None + self.toml = tomlkit.document() + self.parsed = None + + @property + def created(self): + return self.original_data is None + + def table(self, key, key_type=tomlkit.items.KeyType.Bare): + def get_by_key(key): + _key = tomlkit.items.Key(key, key_type) + try: + return self.toml[_key] + except KeyError: + pass + if '.' not in key: + return None + if '"' in key: + key = [*key.split('"')[:2]] + elif "'" in key: + key = [*key.split("'")[:2]] + else: + key = [key, None] + + key_seq = [x for x in key[0].split('.') if x] + if key[1]: + key_seq.append(key[1]) + + t = self.toml + for k in key_seq: + _key = tomlkit.items.Key(k, key_type) + try: + t = t[_key] + except KeyError: + return None + return t + + t = get_by_key(key) + if t is not None: + return t + t = tomlkit.table() + self.toml.append(tomlkit.items.Key(key, key_type), t) + return t + + def get(self, table, key): + try: + tbl = self.toml[table] + return tbl[key] + except NonExistentKey: + return None + + def set(self, table, key, value, *others_key_values, key_type=tomlkit.items.KeyType.Bare): + tbl = self.table(table) + key = tomlkit.items.Key(key, key_type) + tbl[key] = value + while others_key_values: + key = others_key_values[0] + value = others_key_values[1] + others_key_values = others_key_values[2:] + + key = tomlkit.items.Key(key, key_type) + tbl[key] = value + + def setdefault(self, table, key, value, *others_key_values, key_type=tomlkit.items.KeyType.Bare): + tbl = self.table(table) + + key = tomlkit.items.Key(key, key_type) + tbl.setdefault(key, value) + + while others_key_values: + key = others_key_values[0] + value = others_key_values[1] + others_key_values = others_key_values[2:] + + key = tomlkit.items.Key(key, key_type) + tbl.setdefault(key, value) + + def finish(self): + out = tomlkit.dumps(self.toml, sort_keys=True) + if out != self.parsed: + with self.builder.open(self.path, 'w') as f: + f.write(tomlkit.dumps(self.toml)) diff --git a/oarepo_model_builder/outputs/toml_output.py b/oarepo_model_builder/outputs/toml_output.py deleted file mode 100644 index d7069995..00000000 --- a/oarepo_model_builder/outputs/toml_output.py +++ /dev/null @@ -1,49 +0,0 @@ -import os -from collections import namedtuple - -from oarepo_model_builder.outputs import BaseOutput - -from tomlkit import parse, dumps, document, table - -toml_property = namedtuple('toml_property', 'section, property, value') - - -class TomlOutput(BaseOutput): - """Output that handles JSON formatted data.""" - - def __init__(self, path): - super().__init__(path) - self.props_to_add = [] - self.props_to_remove = [] - - def add(self, section, property, value): - self.props_to_add.append(toml_property(section, property, value)) - - def remove(self, section, property, value): - self.props_to_remove.append(toml_property(section, property, value)) - - def save(self): - if os.path.exists(self.path): - with open(self.path, mode='r') as fp: - toml = parse(fp.read()) - else: - toml = document() - - for prop in self.props_to_add: - if prop.section not in toml: - section = table() - toml.add(prop.section, section) - else: - section = toml[prop.section] - section[prop.property] = prop.value - - for prop in self.props_to_remove: - if prop.section not in toml: - continue - else: - section = toml[prop.section] - if prop.property in section: - del section[prop.property] - - with open(self.path, mode='w') as fp: - fp.write(dumps(toml)) diff --git a/oarepo_model_builder/outputs/ui.py b/oarepo_model_builder/outputs/ui.py deleted file mode 100644 index c446081b..00000000 --- a/oarepo_model_builder/outputs/ui.py +++ /dev/null @@ -1,17 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. - -"""OArepo module that generates data model files from a JSON specification file.""" - -from oarepo_model_builder.outputs.json import JsonOutput - - -class UIOutput(JsonOutput): - output_type = 'ui' - - def __init__(self, path): - super().__init__(path, {}) diff --git a/oarepo_model_builder/property_preprocessors/__init__.py b/oarepo_model_builder/property_preprocessors/__init__.py new file mode 100644 index 00000000..3f573b71 --- /dev/null +++ b/oarepo_model_builder/property_preprocessors/__init__.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import functools +import inspect +from collections import namedtuple +from typing import TYPE_CHECKING + +from oarepo_model_builder.utils.json_pathlib import JSONPaths +from oarepo_model_builder.stack import ModelBuilderStack + +if TYPE_CHECKING: + from oarepo_model_builder.builder import ModelBuilder + + +class PropertyPreprocessor: + PathMethodRecord = namedtuple('PathMethodRecord', 'method, output_builder_type') + + def __init__(self, builder: ModelBuilder): + self.builder = builder + # TODO: move this to metaclass and initialize only once per class + self.json_paths = JSONPaths() + arr = [] + for name, method in inspect.getmembers(self, inspect.ismethod): + if not hasattr(method, 'model_builder_priority'): + continue + arr.append( + ( + -method.model_builder_priority, + -len(method.model_builder_path), + method.model_builder_path, + id(method), + method.model_builder_condition, + method.model_builder_output_builder_type, + method + ) + ) + arr.sort() + for _prior, _lpath, path, _mid, condition, output_builder_type, method in arr: + self.json_paths.register(path, condition, + PropertyPreprocessor.PathMethodRecord(method, output_builder_type)) + + def begin(self, schema, settings): + self.schema = schema + self.settings = settings + + def finish(self): + pass + + def _call_method(self, data, stack: ModelBuilderStack, output_builder_type): + for method, _output_builder_type in self.json_paths.match( + stack.path, stack.top.data, + extra_data={ + 'stack': stack + }): + if output_builder_type == _output_builder_type: + return method(data, stack=stack) + + def process(self, output_builder_type: str, data, stack: ModelBuilderStack): + return self._call_method(data, stack, output_builder_type) + + +def process(model_builder, path, priority=0, condition=None): + def wrapper(f): + @functools.wraps(f) + def wrapped(*args, **kwargs): + return f(*args, **kwargs) + + wrapped.model_builder_priority = priority + wrapped.model_builder_output_builder_type = \ + model_builder if isinstance(model_builder, str) else model_builder.TYPE + wrapped.model_builder_path = path + wrapped.model_builder_condition = condition + return wrapped + + return wrapper diff --git a/oarepo_model_builder/property_preprocessors/text_keyword.py b/oarepo_model_builder/property_preprocessors/text_keyword.py new file mode 100644 index 00000000..305619bb --- /dev/null +++ b/oarepo_model_builder/property_preprocessors/text_keyword.py @@ -0,0 +1,99 @@ +from oarepo_model_builder.builders.jsonschema import JSONSchemaBuilder +from oarepo_model_builder.builders.mapping import MappingBuilder +from oarepo_model_builder.invenio.invenio_record_schema import InvenioRecordSchemaBuilder +from oarepo_model_builder.property_preprocessors import PropertyPreprocessor, process +from oarepo_model_builder.utils.deepmerge import deepmerge +from oarepo_model_builder.stack import ModelBuilderStack + + +class TextKeywordPreprocessor(PropertyPreprocessor): + TYPE = 'text_keyword' + # + # type='fulltext' in model + # + + @process(model_builder=JSONSchemaBuilder, + path='**/properties/*', + condition=lambda current: current.type == 'fulltext') + def modify_fulltext_schema(self, data, stack: ModelBuilderStack, **kwargs): + data['type'] = 'string' + return data + + @process(model_builder=MappingBuilder, + path='**/properties/*', + condition=lambda current: current.type == 'fulltext') + def modify_fulltext_mapping(self, data, stack: ModelBuilderStack, **kwargs): + data['type'] = 'text' + return data + + @process(model_builder=InvenioRecordSchemaBuilder, + path='**/properties/*', + condition=lambda current: current.type == 'fulltext') + def modify_fulltext_marshmallow(self, data, stack: ModelBuilderStack, **kwargs): + data['type'] = 'string' + return data + + # + # type='keyword' in model + # + + @process(model_builder=JSONSchemaBuilder, + path='**/properties/*', + condition=lambda current: current.type == 'keyword') + def modify_keyword_schema(self, data, stack: ModelBuilderStack, **kwargs): + data['type'] = 'string' + return data + + @process(model_builder=MappingBuilder, + path='**/properties/*', + condition=lambda current: current.type == 'keyword') + def modify_keyword_mapping(self, data, stack: ModelBuilderStack, **kwargs): + data['type'] = 'keyword' + deepmerge( + data.setdefault('oarepo:mapping', {}), + { + 'ignore_above': self.settings['elasticsearch']['keyword-ignore-above'] + }) + return data + + @process(model_builder=InvenioRecordSchemaBuilder, + path='**/properties/*', + condition=lambda current: current.type == 'keyword') + def modify_keyword_marshmallow(self, data, stack: ModelBuilderStack, **kwargs): + data['type'] = 'string' + return data + + # + # type='fulltext-keyword' in model + # + + @process(model_builder=JSONSchemaBuilder, + path='**/properties/*', + condition=lambda current: current.type == 'fulltext-keyword') + def modify_fulltext_keyword_schema(self, data, stack: ModelBuilderStack, **kwargs): + data['type'] = 'string' + return data + + @process(model_builder=MappingBuilder, + path='**/properties/*', + condition=lambda current: current.type == 'fulltext-keyword') + def modify_fulltext_keyword_mapping(self, data, stack: ModelBuilderStack, **kwargs): + data['type'] = 'text' + deepmerge( + data.setdefault('oarepo:mapping', {}), + { + 'fields': { + 'keyword': { + 'type': 'keyword', + 'ignore_above': self.settings['elasticsearch']['keyword-ignore-above'] + } + } + }, []) + return data + + @process(model_builder=InvenioRecordSchemaBuilder, + path='**/properties/*', + condition=lambda current: current.type == 'fulltext-keyword') + def modify_fulltext_keyword_marshmallow(self, data, stack: ModelBuilderStack, **kwargs): + data['type'] = 'string' + return data diff --git a/oarepo_model_builder/proxies.py b/oarepo_model_builder/proxies.py deleted file mode 100644 index 0f365ee0..00000000 --- a/oarepo_model_builder/proxies.py +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2021 CESNET. -# -# OARepo-Communities is free software; you can redistribute it and/or modify -# it under the terms of the MIT License; see LICENSE file for more details. - -"""OArepo module that generates data model files from a JSON specification file.""" - -from oarepo_model_builder import OARepoModelBuilder - -current_model_builder = OARepoModelBuilder() diff --git a/oarepo_model_builder/schema.py b/oarepo_model_builder/schema.py new file mode 100644 index 00000000..4a137ed6 --- /dev/null +++ b/oarepo_model_builder/schema.py @@ -0,0 +1,107 @@ +import copy +import pathlib +from typing import Dict, Callable + +import munch +from jsonpointer import resolve_pointer + +from .exceptions import IncludedFileNotFoundException +from .utils.deepmerge import deepmerge +from .utils.hyphen_munch import HyphenMunch + + +class ModelSchema: + OAREPO_USE = 'oarepo:use' + + def __init__(self, file_path, content=None, + included_models: Dict[str, Callable] = None, + loaders=None): + """ + Creates and parses model schema + + :param file_path: path on the filesystem to the model schema file + :param content: if set, use this content, otherwise load the file_path + :param included_models: a dictionary of file_id to callable that returns included json. + The callable expects a single parameter, an instance of this schema + """ + + self.file_path = file_path + self.included_schemas = included_models or {} + self.loaders = loaders + + if content is not None: + self.schema = content + else: + self.schema = copy.deepcopy(self._load(file_path)) + + self._resolve_references(self.schema, []) + + self.schema.setdefault('settings', {}) + self.schema['settings'].setdefault('plugins', {}) + self.schema = munch.munchify(self.schema, factory=HyphenMunch) + + def get(self, key): + return self.schema.get(key, None) + + def set(self, key, value): + self.schema[key] = value + + @property + def settings(self): + return self.schema.settings + + def merge(self, another): + self.schema = munch.munchify(deepmerge(another, self.schema, []), factory=HyphenMunch) + + def _load(self, file_path): + """ + Loads a json/json5 file on the path + + :param file_path: file path on filesystem + :return: parsed json + """ + extension = pathlib.Path(file_path).suffix.lower()[1:] + if extension in self.loaders: + return self.loaders[extension](file_path, self) + + raise Exception(f'Can not load {file_path} - no loader has been found for extension {extension} ' + f'in entry point group oarepo_model_builder.loaders') + + def _load_included_file(self, file_id): + """ + Resolve and load an included file. Internal method called when loading schema. + If the included file contains a json pointer, + return only the part identified by the json pointer. + + :param file_id: the id of the included file, might contain #xpointer + :return: loaded json + """ + if '#' in file_id: + file_id, json_pointer = file_id.rsplit('#', 1) + else: + json_pointer = None + + if file_id not in self.included_schemas: + raise IncludedFileNotFoundException(f'Included file {file_id} not found in includes') + + ret = self.included_schemas[file_id](self) + + if json_pointer: + ret = resolve_pointer(ret, json_pointer) + + return copy.deepcopy(ret) + + def _resolve_references(self, element, stack): + if isinstance(element, dict): + if self.OAREPO_USE in element: + included_name = element.pop(self.OAREPO_USE) + included_data = self._load_included_file(included_name) + deepmerge(element, included_data, []) + return self._resolve_references(element, stack) + for k, v in element.items(): + self._resolve_references(v, stack + [k]) + elif isinstance(element, list): + for v in element: + self._resolve_references(v, stack) + + diff --git a/oarepo_model_builder/stack.py b/oarepo_model_builder/stack.py new file mode 100644 index 00000000..0dff756c --- /dev/null +++ b/oarepo_model_builder/stack.py @@ -0,0 +1,133 @@ +from functools import cached_property +from typing import Generator, List + +from deepdiff import DeepDiff + + +class ReplaceElement(Exception): + def __init__(self, data): + super().__init__() + self.data = data + + +class ModelBuilderStackEntry: + def __init__(self, key=None, data=None): + self.key = key + self.data = data + + def __getitem__(self, item): + return self.data[item] + + def __eq__(self, other): + return self.key == other.key and not DeepDiff(self.key, other.key) + + def __str__(self): + return f'{self.key} - {self.data}' + + +class ModelBuilderStack: + DICT = 'dict' + LIST = 'list' + PRIMITIVE = 'primitive' + SKIP = 'skip' + + def __init__(self, schema): + self.schema = schema + self.stack = [] + + def __getitem__(self, item): + return self.stack[item] + + def push(self, key, el): + self._clear_path() + self.stack.append(ModelBuilderStackEntry(key, el)) + + def pop(self): + self._clear_path() + self.stack.pop() + + @property + def top(self): + return self.stack[-1] + + @property + def level(self): + return len(self.stack) + + @property + def top_type(self): + match self.top.data: + case dict(): + return self.DICT + case list(): + return self.LIST + case _: + return self.PRIMITIVE + + @cached_property + def path(self): + return '/' + '/'.join(x.key for x in self.stack if x.key) + + def _clear_path(self): + if "path" in self.__dict__: + del self.__dict__["path"] + + def process(self, on_element): + self.stack = [] + try: + processing_order = self.schema.settings.processing_order + except AttributeError: + processing_order = None + self._process_internal(None, self.schema.schema, on_element, processing_order) + + def _process_internal(self, key, element, on_element, processing_order: List[str] = None): + popped = False + + try: + # push the element to the stack + self.push(key, element) + + # call the on_element function. + ret = on_element(self) + + # if the result is not a generator, + if not isinstance(ret, Generator): + ret = iter([ret]) + + res = next(ret, '') + + if res is self.SKIP: + return + + match self.top_type: + case self.LIST: + for idx, l in enumerate(self.top.data): + self._process_internal(idx, l, on_element) + case self.DICT: + items = list(self.top.data.items()) + if processing_order: + def key_function(x): + try: + return processing_order.index(x) + except ValueError: + pass + try: + return processing_order.index('*') + except ValueError: + pass + return len(processing_order) + + items.sort(key=key_function) + for k, v in items: + self._process_internal(k, v, on_element) + + next(ret, '') + + except ReplaceElement as re: + self.pop() + popped = True + for k, v in re.data.items(): + self._process_internal(k, v, on_element) + finally: + if not popped: + self.pop() diff --git a/oarepo_model_builder/templates/__init__.py b/oarepo_model_builder/templates/__init__.py new file mode 100644 index 00000000..d1149556 --- /dev/null +++ b/oarepo_model_builder/templates/__init__.py @@ -0,0 +1,30 @@ +from pathlib import Path + +import pkg_resources + + +class TemplateRegistry: + def __init__(self): + self.mapping = {} + for ep in reversed(sorted( + pkg_resources.iter_entry_points('oarepo_model_builder.templates'), + key=lambda ep: ep.name)): + loaded_package = ep.load() + base_path = Path(loaded_package.__file__).parent.absolute() + for k, v in loaded_package.TEMPLATES.items(): + self.mapping[k] = base_path.joinpath(v) + + def get_template(self, template_key, settings): + # try to get the template key from settings + path = settings.python.templates.get(template_key, self.mapping.get(template_key, None)) + if not path: + raise AttributeError(f'Template with key {template_key} has not been found') + if isinstance(path, str): + path = Path(path).absolute() + if path.exists(): + with path.open() as f: + return f.read() + raise AttributeError(f'Template with key {template_key} has not been found, file at path {path} does not exist') + + +templates = TemplateRegistry() diff --git a/tests/api/__init__.py b/oarepo_model_builder/utils/__init__.py similarity index 100% rename from tests/api/__init__.py rename to oarepo_model_builder/utils/__init__.py diff --git a/oarepo_model_builder/utils/camelcase.py b/oarepo_model_builder/utils/camelcase.py new file mode 100644 index 00000000..3896bf9d --- /dev/null +++ b/oarepo_model_builder/utils/camelcase.py @@ -0,0 +1,13 @@ +import re + + +def camel_case(s): + s = re.sub(r"(_|-)+", " ", s).title().replace(" ", "") + return ''.join(s) + + +def snake_case(s): + s = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', s) + s = re.sub('__([A-Z])', r'_\1', s) + s = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s) + return s.lower() diff --git a/oarepo_model_builder/utils/cst.py b/oarepo_model_builder/utils/cst.py new file mode 100644 index 00000000..9b1cb9d7 --- /dev/null +++ b/oarepo_model_builder/utils/cst.py @@ -0,0 +1,101 @@ +from libcst import CSTTransformer, ClassDef, FunctionDef, Module, SimpleStatementLine, Import, ImportFrom + + +class MergingTransformer(CSTTransformer): + def __init__(self, new_cst): + super().__init__() + self.new_cst = new_cst + self.stack = [] + self._push(new_cst.body) + + def _push(self, new_members): + self.stack.append((new_members, {})) + + def _pop(self): + return self.stack.pop() + + def _set(self, key, value): + self.stack[-1][1][key] = value + + def visit_ClassDef(self, node: ClassDef): + self._set(node.name.value, node) + for part in self.stack[-1][0]: + if hasattr(part, 'name'): + if part.name.value == node.name.value: + if hasattr(part, 'body'): + self._push(part.body.body) + break + else: + self._push([]) + + def visit_FunctionDef(self, node: FunctionDef): + # do not visit the children of the function as we do not modify + # an already written function code + self._set(node.name.value, node) + return False + + def leave_ClassDef( + self, original_node: ClassDef, updated_node: ClassDef + ): + parts_to_add = self.leave_class_module() + self._pop() + + if parts_to_add: + # and merge them with the original content + return updated_node.with_changes( + body=updated_node.body.with_changes( + body=[ + *updated_node.body.body, + *parts_to_add + ] + ) + ) + return updated_node + + def leave_Module(self, original_node: Module, updated_node: Module): + parts_to_add = self.leave_class_module() + + new_imports = self._extract_imports(self.stack[0][0]) + existing_imports = self._extract_imports(original_node.body) + + # extract imports that are not yet present + extra_imports = [] + for ni in new_imports: + for ei in existing_imports: + if ei.deep_equals(ni): + break + else: + extra_imports.append(ni) + + self._pop() + + # and merge + return updated_node.with_changes( + body=[ + *extra_imports, + *updated_node.body, + *parts_to_add + ] + ) + + def _extract_imports(self, lines): + imports = [] + for l in lines: + if isinstance(l, SimpleStatementLine): + for ml in l.children: + if isinstance(ml, (Import, ImportFrom)): + imports.append(l) + break + return imports + + def leave_class_module(self): + new_parts = self.stack[-1][0] + existing_parts = self.stack[-1][1] + + # look for the parts in the new parts that are not in the existing parts + parts_to_add = [] + for part in new_parts: + if hasattr(part, 'name'): + if part.name.value not in existing_parts: + parts_to_add.append(part) + return parts_to_add diff --git a/oarepo_model_builder/utils/deepmerge.py b/oarepo_model_builder/utils/deepmerge.py new file mode 100644 index 00000000..37c83796 --- /dev/null +++ b/oarepo_model_builder/utils/deepmerge.py @@ -0,0 +1,24 @@ +def deepmerge(target, source, stack=None): + if stack is None: + stack = [] + + if isinstance(target, dict): + if source is not None: + if not isinstance(source, dict): + raise AttributeError( + f'Incompatible source and target on path {stack}: source {source}, target {target}') + for k, v in source.items(): + if k not in target: + target[k] = source[k] + else: + target[k] = deepmerge(target[k], source[k], stack + [k]) + elif isinstance(target, list): + if source is not None: + if not isinstance(source, list): + raise AttributeError( + f'Incompatible source and target on path {stack}: source {source}, target {target}') + for idx in range(min(len(source), len(target))): + target[idx] = deepmerge(target[idx], source[idx], stack + [idx]) + for idx in range(len(target), len(source)): + target.append(source[idx]) + return target \ No newline at end of file diff --git a/oarepo_model_builder/utils/hyphen_munch.py b/oarepo_model_builder/utils/hyphen_munch.py new file mode 100644 index 00000000..cdc7bcd8 --- /dev/null +++ b/oarepo_model_builder/utils/hyphen_munch.py @@ -0,0 +1,21 @@ +from typing import Mapping + +import munch + + +class HyphenMunch(munch.AutoMunch): + def __setitem__(self, key, value): + if isinstance(value, Mapping) and not isinstance(value, (munch.AutoMunch, munch.Munch)): + value = munch.munchify(value, HyphenMunch) + return super().__setitem__(key, value) + + def __getitem__(self, key): + try: + return super().__getitem__(key) + except: + try: + key = key.replace('_', '-') + return super().__getitem__(key) + except: + key = key.replace('-', '_') + return super().__getitem__(key) diff --git a/oarepo_model_builder/utils/json_pathlib.py b/oarepo_model_builder/utils/json_pathlib.py new file mode 100644 index 00000000..6911a937 --- /dev/null +++ b/oarepo_model_builder/utils/json_pathlib.py @@ -0,0 +1,141 @@ +import re +from collections import OrderedDict, namedtuple +from functools import cached_property +from typing import List, Iterable + +""" +A path is a sequence of names separated by '/' with an optional +condition applied to the subtree. + +The condition is a function taking current:PathCondition as a single parameter +which can be used for example as: +current.a.b == "data" or current["a"]["b"] == "data" + +Any expression with the current should return an iterator or list of results (matched subtrees). +If any is truish, the condition is interpreted as matching. +""" + +JSONPathRecord = namedtuple('JSONPathRecord', 'path, condition, value') + + +class JSONPaths: + def __init__(self): + self.path_regex_list = [] + self.path_to_index = {} + self.paths: List[List[JSONPathRecord]] = [] + + def register(self, path, condition=None, value=None): + """ + paths should be registered from longest to shortest + + :param path: a path to register + :param condition: a condition applied to the node on the path + :param value: the value to return if path & condition match + """ + + if path not in self.path_to_index: + self.path_to_index[path] = len(self.path_to_index) + self.paths.append([]) + self.path_regex_list.append(path_to_regex(path)) + path_locators = self.paths[self.path_to_index[path]] + path_locators.append(JSONPathRecord( + path=path, + condition=condition, + value=value + )) + + @cached_property + def path_regex(self): + return re.compile('|'.join(f'({x})' for x in self.path_regex_list)) + + def match(self, path=None, subtree=None, extra_data=None): + """ + Matches a path and subtree against stored paths. Returns iterator of matched values + + :param path: the path that should match + :param subtree: teh subtree against which to match stored conditions + :return: iterator of matched values + """ + match = self.path_regex.match(path) + if not match: + return None + for idx, grp in enumerate(match.groups()): + if grp: + matched = False + for rec in self.paths[idx]: + if rec.condition: + condition_result = rec.condition(PathCondition(subtree, extra_data=extra_data)) + if isinstance(condition_result, Iterable): + condition_result = any(condition_result) + if condition_result: + matched = True + yield rec.value + else: + matched = True + yield rec.value + + if matched: + break + + +def path_to_regex(path): + split_path = [x for x in re.split('(/)', path) if x] + + def fragment_to_regex(f): + if f == '**': + return '.+' + return f.replace('*', '[^/]+') + + return ''.join(fragment_to_regex(x) for x in split_path) + + +class PathCondition: + def __init__(self, start=None, subtree_list=(), extra_data=None): + self._subtree_list = [*subtree_list] + if start: + self._subtree_list.append(start) + self._extra_data = extra_data or {} + + def _apply(self, p, subtree_list): + if subtree_list: + if p == '*': + for subtree in subtree_list: + itr = [] + if isinstance(subtree, dict): + itr = subtree.values() + elif isinstance(subtree, list): + itr = subtree + yield from itr + elif p == '**': + first_level = list(self._apply('*', subtree_list)) + yield from first_level + yield from self._apply('**', first_level) + else: + for subtree in subtree_list: + # match the path element and descend if found + if isinstance(subtree, dict): + if p in subtree: + yield subtree[p] + elif isinstance(subtree, list): + if isinstance(p, int) and 0 <= p < len(subtree): + yield subtree[p] + + def __getattr__(self, item): + if item in self._extra_data: + return self._extra_data[item] + try: + item = int(item) + except: + pass + return PathCondition(subtree_list=list(self._apply(item, self._subtree_list))) + + def __getitem__(self, item): + return PathCondition(subtree_list=list(self._apply(item, self._subtree_list))) + + def __eq__(self, other): + if isinstance(other, PathCondition): + # TODO: deep equals maybe + return [subtree for subtree in self._subtree_list for o in other._subtree_list if subtree == o] + + # TODO: deep equals maybe + return [subtree for subtree in self._subtree_list if subtree == other] diff --git a/oarepo_model_builder/utils/schema.py b/oarepo_model_builder/utils/schema.py new file mode 100644 index 00000000..b1c611f0 --- /dev/null +++ b/oarepo_model_builder/utils/schema.py @@ -0,0 +1,112 @@ +from oarepo_model_builder.stack import ModelBuilderStack + + +class SchemaPathValidator: + valid = False + + def get(self, path_el): + raise NotImplementedError() + + +class Invalid(SchemaPathValidator): + def get(self, path_el): + return self + + +invalid = Invalid() + + +class PrimitiveValidator(SchemaPathValidator): + valid = True + + def get(self, path_el): + return invalid + + +primitive = PrimitiveValidator() + + +class DictValidator(SchemaPathValidator): + valid = True + + def __init__(self, dict=None, primitives=None): + self.dict = dict or {} + if primitives: + for p in primitives.split(','): + p = p.strip() + if not p: + continue + self.dict[p] = primitive + + def get(self, path_el): + if path_el in self.dict: + return self.dict[path_el] + return invalid + + +class AnyKeyDictValidator(SchemaPathValidator): + valid = True + + def __init__(self, _next): + self._next = _next + + def get(self, path_el): + return self._next + + +class Ref(SchemaPathValidator): + refs = {} + valid = True + + def __init__(self, refstr): + self.refstr = refstr + + def get(self, path_el): + return self.refs[self.refstr].get(path_el) + + +Ref.refs['type'] = DictValidator({ + "properties": Ref('property'), + "patternProperties": Ref('property'), + "additionalProperties": DictValidator(primitives='type'), + "propertyNames": DictValidator(primitives='pattern'), + "items": Ref('type'), + "prefixItems": Ref('type'), + "contains": Ref('type'), +}, + primitives="type," + "enum,const," # enums and consts + "required,minProperties,maxProperties," # object + "minItems,maxItems,uniqueItems,minContains,maxContains," # array + "minLength,maxLength,pattern,format," # string + "minimum,exclusiveMinimum,maximum,exclusiveMaximum,multipleOf" # numbers +) + +Ref.refs['property'] = AnyKeyDictValidator(Ref('type')) + +schema_paths = DictValidator( + { + "$vocabulary": AnyKeyDictValidator(primitive), + "properties": Ref('property') + }, primitives='$schema,$id,type' +) + +model_paths = DictValidator( + { + 'model': schema_paths + } +) + + +def is_schema_element(stack: ModelBuilderStack): + sc = model_paths + for entry in stack.stack: + key = entry.key + if key is None: + continue + if isinstance(key, int): + continue + sc = sc.get(key) + if not sc.valid: + return False + return True diff --git a/oarepo_model_builder/utils/verbose.py b/oarepo_model_builder/utils/verbose.py new file mode 100644 index 00000000..a8a878d9 --- /dev/null +++ b/oarepo_model_builder/utils/verbose.py @@ -0,0 +1,43 @@ +import logging +from collections import namedtuple + +default_logger = logging.getLogger('oarepo_model_builder') + + +# verbosity 1 is the most serious +# verbosity 9 is the least serious + + +class Log: + INFO = 0 + ERROR = logging.INFO - logging.ERROR + + LogStackEntry = namedtuple('LogStackEntry', 'verbosity,indent,logger') + + def __init__(self): + self.stack = [Log.LogStackEntry(verbosity=0, indent=0, logger=default_logger)] + + def enter(self, verbosity, fmt, *args, logger=None, **kwargs): + if not logger: + logger = self.stack[-1].logger + level = self.stack[-1].indent + + if logger.isEnabledFor(logging.INFO - verbosity): + self(verbosity, fmt, *args, logger=logger, **kwargs) + level += 1 + + self.stack.append(Log.LogStackEntry(verbosity=verbosity, indent=level, logger=logger)) + + def leave(self, fmt=None, *args, **kwargs): + top = self.stack.pop() + if fmt: + self(top.verbosity, fmt, *args, logger=top.logger, **kwargs) + + def __call__(self, verbosity, fmt, *args, logger=None, **kwargs): + if not logger: + logger = self.stack[-1].logger + indent = ' ' * (self.stack[-1].indent - 1) + logger.log(logging.INFO - verbosity, indent + fmt, *args, **kwargs) + + +log = Log() diff --git a/pyproject.toml b/pyproject.toml index 06ab007a..64058087 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,8 @@ [tool.poetry] name = "oarepo-model-builder" -version = "0.1.7" +version = "0.9.0" description = "An utility library that generates OARepo required data model files from a JSON specification file" -authors = ["Miroslav Bauer "] +authors = ["Miroslav Bauer ", "Miroslav Simek "] readme = "README.md" packages = [ @@ -10,33 +10,81 @@ packages = [ ] [tool.poetry.dependencies] -python = ">=3.8,<4.0" -json5 = "^0.9.6" -deepmerge = "^0.3.0" +python = ">=3.10,<4.0" +json5 = { version = "^0.9.6", optional = true } libcst = "^0.3.19" -munch = "^2.4.0" click = ">=7.1" tomlkit = "^0.7.2" +jsonpointer = "^2.2" +deepdiff = "^5.6.0" +PyYAML = { version = "^6.0", optional = true } +Jinja2 = "^3.0.3" +munch = "^2.5.0" +isort = "^5.10.1" +black = "^21.11b1" + +[tool.poetry.extras] +json5 = ["json5"] +pyyaml = ["pyyaml"] [tool.poetry.dev-dependencies] -pytest = "^4.6" +pytest = "^6" [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" -[tool.poetry.plugins."oarepo_model_builder.source"] -datamodel = "oarepo_model_builder.builders:DataModelBuilder" +[tool.poetry.plugins."oarepo_model_builder.builders"] +020-jsonschema = "oarepo_model_builder.builders.jsonschema:JSONSchemaBuilder" +030-mapping = "oarepo_model_builder.builders.mapping:MappingBuilder" + +100-python_structure = "oarepo_model_builder.builders.python_structure:PythonStructureBuilder" +110-invenio_record = "oarepo_model_builder.invenio.invenio_record:InvenioRecordBuilder" +120-invenio_record_metadata = "oarepo_model_builder.invenio.invenio_record_metadata:InvenioRecordMetadataBuilder" +130-invenio_record_schema = "oarepo_model_builder.invenio.invenio_record_schema:InvenioRecordSchemaBuilder" + +200-invenio_record_permissions = "oarepo_model_builder.invenio.invenio_record_permissions:InvenioRecordPermissionsBuilder" + +300-invenio_record_search_options = "oarepo_model_builder.invenio.invenio_record_search:InvenioRecordSearchOptionsBuilder" +310-invenio_record_service_config = "oarepo_model_builder.invenio.invenio_record_service_config:InvenioRecordServiceConfigBuilder" +320-invenio_record_service = "oarepo_model_builder.invenio.invenio_record_service:InvenioRecordServiceBuilder" +340-invenio_record_dumper = "oarepo_model_builder.invenio.invenio_record_dumper:InvenioRecordDumperBuilder" + +400-invenio_record_resource_config = "oarepo_model_builder.invenio.invenio_record_resource_config:InvenioRecordResourceConfigBuilder" +410-invenio_record_resource = "oarepo_model_builder.invenio.invenio_record_resource:InvenioRecordResourceBuilder" +420-invenio_blueprint = "oarepo_model_builder.invenio.invenio_blueprint:InvenioBlueprintBuilder" + +900-invenio_sample_app_poetry = "oarepo_model_builder.invenio.invenio_sample_app_poetry:InvenioSampleAppPoetryBuilder" +910-invenio_record_metadata_alembic_poetry = "oarepo_model_builder.invenio.invenio_record_metadata_alembic_poetry:InvenioRecordMetadataAlembicPoetryBuilder" +920-invenio_record_metadata_models_poetry = "oarepo_model_builder.invenio.invenio_record_metadata_models_poetry:InvenioRecordMetadataModelsPoetryBuilder" +930-invenio_resource_poetry = "oarepo_model_builder.invenio.invenio_record_resource_poetry:InvenioRecordResourcePoetryBuilder" +940-invenio_record_search_poetry = "oarepo_model_builder.invenio.invenio_record_search_poetry:InvenioRecordSearchPoetryBuilder" +950-invenio_record_jsonschemas_poetry = "oarepo_model_builder.invenio.invenio_record_jsonschemas_poetry:InvenioRecordJSONSchemasPoetryBuilder" + + +[tool.poetry.plugins."oarepo_model_builder.ouptuts"] +jsonschema = "oarepo_model_builder.outputs.jsonschema:JSONSchemaOutput" +mapping = "oarepo_model_builder.outputs.mapping:MappingOutput" +python = "oarepo_model_builder.outputs.python:PythonOutput" +toml = "oarepo_model_builder.outputs.toml:TOMLOutput" + +[tool.poetry.plugins."oarepo_model_builder.property_preprocessors"] +10-text_keyword = "oarepo_model_builder.property_preprocessors.text_keyword:TextKeywordPreprocessor" + +[tool.poetry.plugins."oarepo_model_builder.model_preprocessors"] +01-default = "oarepo_model_builder.model_preprocessors.default_values:DefaultValuesModelPreprocessor" +10-invenio = "oarepo_model_builder.model_preprocessors.invenio:InvenioModelPreprocessor" +20-elasticsearch = "oarepo_model_builder.model_preprocessors.elasticsearch:ElasticsearchModelPreprocessor" -[tool.poetry.plugins."oarepo_model_builder.elements"] -jsonschema = "oarepo_model_builder.builders:JSONSchemaBuilder" -mapping = "oarepo_model_builder.builders:MappingBuilder" +[tool.poetry.plugins."oarepo_model_builder.loaders"] +json = "oarepo_model_builder.loaders:json_loader" +json5 = "oarepo_model_builder.loaders:json_loader" +yaml = "oarepo_model_builder.loaders:yaml_loader" +yml = "oarepo_model_builder.loaders:yaml_loader" -[tool.poetry.plugins."oarepo_model_builder.jsonschema"] -jsonschema = "oarepo_model_builder.outputs:JsonSchemaOutput" +[tool.poetry.plugins."oarepo_model_builder.templates"] +99-base_templates = "oarepo_model_builder.invenio" -[tool.poetry.plugins."oarepo_model_builder.mapping"] -mapping = "oarepo_model_builder.outputs:MappingOutput" [tool.poetry.scripts] -models = "oarepo_model_builder.cli:model" +oarepo-compile-model = "oarepo_model_builder.cli:run" diff --git a/tests/api/conftest.py b/tests/api/conftest.py deleted file mode 100644 index 7aa9fc7e..00000000 --- a/tests/api/conftest.py +++ /dev/null @@ -1,91 +0,0 @@ -import os - -import pytest - -from oarepo_model_builder.proxies import current_model_builder - - -class LiteEntryPoint: - def __init__(self, name, val): - self.name = name - self.val = val - - def load(self): - return self.val - - -def extra_entrypoints(app, group=None, name=None): - from . import datamodels - - data = { - 'oarepo_model_builder.datamodels': [ - LiteEntryPoint('test', datamodels), - ], - } - - names = data.keys() if name is None else [name] - for key in names: - for entry_point in data[key]: - yield entry_point - - -@pytest.fixture() -def datamodel_json(): - return { - "title": "Test record v1.0.0", - "type": "object", - "additionalProperties": False, - # TODO: implement oarepo:include - # "oarepo:include": ["invenio-record-v1.0.0"], - "oarepo:ui": { - "title": { - "cs": "Datamodel title CS", - "en": "Datamodel title EN" - } - }, - "properties": { - "field1": { - "type": "string", - "oarepo:ui": { - "hint": { - "cs": "testovaci field", - "en": "test field" - }, - }, - "oarepo:search": { - "mapping": "keyword" - }, - }, - "field2": { - "type": "object", - "description": "Record access control and ownership.", - "additionalProperties": False, - "properties": { - "subfield1": { - "description": "Sub field 1.", - "type": "array", - # TODO: implement items auto import - # "items": "rdm-definitions-v1.0.0#agent", - "oarepo:ui": { - "label": {"cs": "vloz subfield1 hodnotu", - "en": "enter subfield1 value"} - } - # TODO: implement default mappings for field without `search` spec - } - } - } - } - } - - -@pytest.fixture() -def model_config(): - config = current_model_builder.model_config - config.base_dir = os.getcwd() - - config.source = None - config.package = (os.path.basename(os.getcwd())).replace('-', '_') - config.kebab_package = config.package.replace('_', '-') - config.datamodel = config.kebab_package - config.datamodel_version = '1.0.0' - return config diff --git a/tests/api/datamodels/__init__.py b/tests/api/datamodels/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/api/datamodels/test.json5 b/tests/api/datamodels/test.json5 deleted file mode 100644 index b60721dc..00000000 --- a/tests/api/datamodels/test.json5 +++ /dev/null @@ -1,49 +0,0 @@ -{ - "title": "Test record v1.0.0", - "type": "object", - "additionalProperties": false, - "oarepo:ui": { - "title": { - "cs": "Title cs", - "en": "Title en" - }, - }, - "oarepo:use": [ - "include1" - ], - "properties": { - "field0": { - "oarepo:use": "include2" - }, - "field1": { - "type": "string", - "oarepo:ui": { - "hint": { - "cs": "testovaci field", - "en": "test field" - }, - }, - "oarepo:search": { - "mapping": "keyword" - }, - }, - "field2": { - "type": "object", - "description": "Record access control and ownership.", - "additionalProperties": false, - "properties": { - "subfield1": { - "description": "Sub field 1.", - "type": "array", - "items": "include2", - "oarepo:ui": { - "label": { - "cs": "vloz subfield1 hodnotu", - "en": "enter subfield1 value" - } - } - } - } - } - } -} \ No newline at end of file diff --git a/tests/api/datamodels/type1.json5 b/tests/api/datamodels/type1.json5 deleted file mode 100644 index 853d3332..00000000 --- a/tests/api/datamodels/type1.json5 +++ /dev/null @@ -1,10 +0,0 @@ -{ - "title": "Included properties v1.0.0", - "type": "object", - "additionalProperties": true, - "properties": { - "includedField1": { - "type": "string", - } - } -} \ No newline at end of file diff --git a/tests/api/datamodels/type2.json5 b/tests/api/datamodels/type2.json5 deleted file mode 100644 index 3ab970ec..00000000 --- a/tests/api/datamodels/type2.json5 +++ /dev/null @@ -1,3 +0,0 @@ -{ - "type": "number" -} diff --git a/tests/api/datamodels/type3.json5 b/tests/api/datamodels/type3.json5 deleted file mode 100644 index b8f3e867..00000000 --- a/tests/api/datamodels/type3.json5 +++ /dev/null @@ -1,14 +0,0 @@ -{ - "type": "string", - "oarepo:ui": { - "hint": { - "cs": "includnuty field 3", - "en": "included field 3" - }, - }, - "search": { - "mapping": { - "type": "text" - } - } -} diff --git a/tests/api/datamodels/type4.json5 b/tests/api/datamodels/type4.json5 deleted file mode 100644 index 51cd37d5..00000000 --- a/tests/api/datamodels/type4.json5 +++ /dev/null @@ -1,9 +0,0 @@ -{ - "type": "object", - "additionalProperties": false, - "properties": { - "includedField2": { - "type": "number", - } - } -} diff --git a/tests/api/helpers.py b/tests/api/helpers.py deleted file mode 100644 index 57973588..00000000 --- a/tests/api/helpers.py +++ /dev/null @@ -1,13 +0,0 @@ - -def navigate_json(src, *path): - for p in path: - src = src[p] - - return src - - -def _process_field(builder, src, path_list, config, outputs): - for paths in path_list: - builder.pre(navigate_json(src, *paths), config, ['properties'] + paths, outputs) - for paths in reversed(path_list): - builder.post(navigate_json(src, *paths), config, ['properties'] + paths, outputs) diff --git a/tests/api/test_api.py b/tests/api/test_api.py deleted file mode 100644 index befcfe4f..00000000 --- a/tests/api/test_api.py +++ /dev/null @@ -1,182 +0,0 @@ -from unittest.mock import patch - -from oarepo_model_builder.api import resolve_includes -from oarepo_model_builder.proxies import current_model_builder -from tests.api.conftest import extra_entrypoints - - -@patch('pkg_resources.iter_entry_points', extra_entrypoints) -def test_resolve_includes(): - assert {'test', 'type1', 'type2', 'type3', 'type4'} == set(current_model_builder.datamodels) - - test_cases = [ - # 1) Check if list type references resolves correctly - # 1.1) For explicit oarepo:use keyword definition - ({ - "title": "Test record for 1.1", - "type": "object", - "additionalProperties": False, - "oarepo:use": ["type1"], - "properties": { - "field1": { - "oarepo:use": [ - "type1", - "type4" - ] - } - } - }, { - "title": "Test record for 1.1", - "type": "object", - "additionalProperties": False, - "properties": { - "includedField1": {"type": "string"}, - "field1": { - "type": "object", - 'title': 'Included properties v1.0.0', - "additionalProperties": False, - "properties": { - "includedField1": {"type": "string"}, - "includedField2": {"type": "number"} - } - } - } - }), - # 1.2) For implicit list definition - ({ - "title": "Test record for 1.2", - "type": "object", - "additionalProperties": False, - "properties": { - "field1": [ - 'type1', - 'type4' - ] - } - }, { - "title": "Test record for 1.2", - "type": "object", - "additionalProperties": False, - "properties": { - "field1": { - "type": "object", - "title": "Included properties v1.0.0", - "additionalProperties": False, - "properties": { - "includedField1": {"type": "string"}, - "includedField2": {"type": "number"} - } - } - } - }), - # 2) Check if string type reference is resolved correctly - # 2.1) For explicit oarepo:use keyword definition - ({ - "title": "Test record for 2.1", - "type": "object", - "additionalProperties": False, - "properties": { - "field1": { - "oarepo:use": "type2" - } - } - }, { - "title": "Test record for 2.1", - "type": "object", - "additionalProperties": False, - "properties": { - "field1": { - "type": "number" - } - } - }), - # 2.2) For implicit property type definition - ({ - "title": "Test record for 2.2", - "type": "object", - "additionalProperties": False, - "properties": { - "field1": "type2" - } - }, { - "title": "Test record for 2.2", - "type": "object", - "additionalProperties": False, - "properties": { - "field1": { - "type": "number" - } - } - }), - # 3) Check array items type reference resolves correctly - # 3.1) For string reference - ({ - "title": "Test record for 3.1", - "type": "object", - "additionalProperties": False, - "properties": { - "arrayField": { - "type": "array", - "items": "type2" - } - } - }, { - "title": "Test record for 3.1", - "type": "object", - "additionalProperties": False, - "properties": { - "arrayField": { - "type": "array", - "items": { - "type": "number" - } - } - } - }), - # 3.2) For list reference - ({ - "title": "Test record for 3.2", - "type": "object", - "additionalProperties": False, - "properties": { - "arrayField": { - "type": "array", - "items": [ - "type1", - "type4", - { - "type": "object", - "properties": { - "field1": "type2" - } - } - ] - } - } - }, { - "title": "Test record for 3.2", - "type": "object", - "additionalProperties": False, - "properties": { - "arrayField": { - "type": "array", - "items": { - "type": "object", - 'title': 'Included properties v1.0.0', - "additionalProperties": False, - "properties": { - "includedField1": {"type": "string"}, - "includedField2": {"type": "number"}, - "field1": {"type": "number"} - } - } - } - } - }) - ] - - for tc in test_cases: - src, result = tc - - resolve_includes(src, None) - assert src == result diff --git a/tests/api/test_builder.py b/tests/api/test_builder.py deleted file mode 100644 index bd62de18..00000000 --- a/tests/api/test_builder.py +++ /dev/null @@ -1,16 +0,0 @@ -from oarepo_model_builder.builders import JSONSchemaBuilder -from oarepo_model_builder.builders import MappingBuilder -from oarepo_model_builder.builders import DataModelBuilder -from oarepo_model_builder.proxies import current_model_builder - - -def test_datamodel_builder(datamodel_json, model_config): - build = DataModelBuilder() - el_handlers = [MappingBuilder(), JSONSchemaBuilder()] - - outputs = {} - build(el=datamodel_json, config=model_config, path=[], outputs=outputs, handlers=el_handlers) - - assert len(outputs) == 2 - assert 'mapping' in outputs - assert 'jsonschema' in outputs diff --git a/tests/api/test_cli.py b/tests/api/test_cli.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/api/test_jsonschema.py b/tests/api/test_jsonschema.py deleted file mode 100644 index fa17155d..00000000 --- a/tests/api/test_jsonschema.py +++ /dev/null @@ -1,108 +0,0 @@ -import json -import os - -from oarepo_model_builder.builders import JSONSchemaBuilder -from oarepo_model_builder.outputs import JsonSchemaOutput -from oarepo_model_builder.proxies import current_model_builder -from tests.api.helpers import _process_field - - -def test_jsonschema_builder(model_config): - builder = JSONSchemaBuilder() - outputs = {} - config = current_model_builder.model_config - - # 1) Test that begin properly initializes outputs - builder.begin(model_config, outputs, {}) - assert len(outputs) == 1 - assert outputs['jsonschema'].path.split('/')[-3:] == [ - 'jsonschemas', - 'oarepo_model_builder', - 'oarepo-model-builder-v1.0.0.json' - ] - assert outputs['jsonschema'].data == model_config.jsonschema - assert len(builder.stack) == 1 - assert builder.stack == [model_config.jsonschema] - - # 2) Test `pre` implementation - - # 2.1) `oarepo:` elements and subtrees are ignored by jsonschema - src = { - 'field1': { - 'type': 'object', - 'properties': { - 'a': { - 'type': 'string', - 'oarepo:search': 'text' - } - }, - 'required': ['a'], - 'oarepo:ui': { - 'label': {} - }, - 'oarepo:search': - {'mapping': 'keyword'} - } - } - res = { - **config.jsonschema, - 'properties': { - 'field1': { - 'type': 'object', - 'properties': { - 'a': { - 'type': 'string' - } - }, - 'required': ['a'], - } - } - } - builder.pre(src, config, ['properties'], outputs) - builder.pre(src, config, ['field1'], outputs) - - path_list = [['type']] - _process_field(builder, src['field1'], path_list, config, outputs) - - path_list = [['properties'], - ['properties', 'a'], - ['properties', 'a', 'type'], - ['properties', 'a', 'oarepo:search']] - _process_field(builder, src['field1'], path_list, config, outputs) - - path_list = [['required']] - _process_field(builder, src['field1'], path_list, config, outputs) - - path_list = [['oarepo:ui']] - _process_field(builder, src['field1'], path_list, config, outputs) - - path_list = [['oarepo:search'], - ['oarepo:search', 'mapping']] - _process_field(builder, src['field1'], path_list, config, outputs) - - assert outputs['jsonschema'].data == res - - -def test_jsonschema_output(model_config): - test_path = '/tmp/test.json' - test_data = { - 'properties': {'field1': {'type': 'number'}}, - **model_config.jsonschema - } - - jo = JsonSchemaOutput(path=test_path, data=test_data) - - # 1) Test output initialize - assert jo.output_type == 'jsonschema' - assert jo.path == test_path - assert jo.data == test_data - - # 2) Test output `save` - assert not os.path.exists(test_path) - jo.save() - assert os.path.exists(test_path) - - with open(test_path, mode='r') as fp: - saved = json.load(fp) - os.remove(test_path) - assert saved == test_data diff --git a/tests/api/test_mapping.py b/tests/api/test_mapping.py deleted file mode 100644 index 275a4258..00000000 --- a/tests/api/test_mapping.py +++ /dev/null @@ -1,276 +0,0 @@ -import copy -import json -import os - -from oarepo_model_builder.builders import MappingBuilder -from oarepo_model_builder.outputs import MappingOutput -from oarepo_model_builder.proxies import current_model_builder -from tests.api.helpers import _process_field - - -def test_mapping_builder(): - mb = MappingBuilder() - - config = current_model_builder.model_config - - outputs = {} - el = {} - mb.begin(config, outputs, el) - - assert len(outputs) == 1 - assert isinstance(outputs['mapping'], MappingOutput) - - # 0) Test False mapping elements are ignored - src = { - 'test0': {'type': 'string'}, - 'test1': {'oarepo:search': False} - } - - res = { - **config.search['mapping'], - **{ - 'mappings': { - 'properties': {} - } - } - } - no_mapping_config = copy.deepcopy(config) - no_mapping_config.search['default_mapping_type'] = False - - mb.pre(src, config, ['properties'], outputs) - # -- process test0 field - path_list = [['test0'], ['test0', 'type']] - _process_field(mb, src, path_list, no_mapping_config, outputs) - # -- process test1 field - path_list = [['test1'], ['test1', 'oarepo:search']] - _process_field(mb, src, path_list, config, outputs) - - assert outputs['mapping'].data == res - - mb = MappingBuilder() - mb.begin(config, outputs, el) - # 1) Test default mapping type without explicit mapping spec - src = { - 'test0': {'type': 'string'}, - 'testObject': { - 'type': 'object', - 'properties': { - 'field1': {'type': 'string'} - } - } - } - res = {**config.search['mapping'], - **{ - 'mappings': { - 'properties': { - 'test0': {'type': 'keyword'}, - 'testObject': { - 'type': 'object', - 'properties': { - 'field1': { - 'type': 'keyword' - } - } - } - } - } - }} - - mb.pre(src, config, ['properties'], outputs) - # -- process test0 field - path_list = [['test0'], ['test0', 'type']] - _process_field(mb, src, path_list, config, outputs) - - # -- process testObject field - path_list = [['testObject'], - ['testObject', 'properties'], - ['testObject', 'properties', 'field1'], - ['testObject', 'properties', 'field1', 'type']] - _process_field(mb, src, path_list, config, outputs) - - assert outputs['mapping'].data == res - - mb = MappingBuilder() - mb.begin(config, outputs, el) - - # 2) Test explicit field mapping specification - src = { - 'test1': { - 'type': 'string', - 'oarepo:search': {'mapping': 'keyword'} - }, - 'testShorthand': { - 'type': 'string', - 'oarepo:search': 'date' - }, - 'testObject': { - 'oarepo:search': { - 'mapping': { - 'type': 'text', - 'index': False - } - } - } - } - res = {**config.search['mapping'], - **{ - 'mappings': { - 'properties': { - 'test1': {'type': 'keyword'}, - 'testShorthand': {'type': 'date'}, - 'testObject': { - 'type': 'text', - 'index': False - } - } - } - }} - - mb.pre(src, config, ['properties'], outputs) - # -- process test1 - path_list = [['test1'], ['test1', 'oarepo:search']] - _process_field(mb, src, path_list, config, outputs) - - # -- process testShorthand - path_list = [['testShorthand'], ['testShorthand', 'oarepo:search']] - _process_field(mb, src, path_list, config, outputs) - - # -- process testObject - path_list = [['testObject'], ['testObject', 'oarepo:search']] - _process_field(mb, src, path_list, config, outputs) - - assert outputs['mapping'].data == res - - # 3) Test items mapping - mb = MappingBuilder() - mb.begin(config, outputs, el) - src = { - 'testObjItems': { - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': { - 'sub1': {'type': 'string'} - } - } - }, - 'testSimpleItems': { - 'type': 'array', - 'items': { - 'type': 'string' - } - }, - 'testExplicitItems': { - 'type': 'array', - 'items': { - 'type': 'string', - 'oarepo:search': 'date' - } - }, - 'testComplexItems': { - 'type': 'array', - 'uniqueItems': True, - 'items': { - "type": "object", - "properties": { - "identifiers": { - "type": "array", - "items": { - "type": "string", - "oarepo:search": "text" - }, - "uniqueItems": True - } - } - } - } - } - res = {**config.search['mapping'], - **{ - 'mappings': { - 'properties': { - 'testObjItems': { - 'type': 'object', - 'properties': { - 'sub1': { - 'type': 'keyword' - } - } - }, - 'testSimpleItems': { - 'type': 'keyword' - }, - 'testExplicitItems': { - 'type': 'date' - }, - 'testComplexItems': { - 'type': 'object', - 'properties': { - 'identifiers': { - 'type': 'text' - } - } - } - } - } - }} - mb.pre(src, config, ['properties'], outputs) - - path_list = [['testObjItems'], - ['testObjItems', 'items'], - ['testObjItems', 'items', 'properties'], - ['testObjItems', 'items', 'properties', 'sub1'], - ['testObjItems', 'items', 'properties', 'sub1', 'type']] - _process_field(mb, src, path_list, config, outputs) - - path_list = [['testSimpleItems'], - ['testSimpleItems', 'items'], - ['testSimpleItems', 'items', 'type']] - _process_field(mb, src, path_list, config, outputs) - - path_list = [['testExplicitItems'], - ['testExplicitItems', 'items'], - ['testExplicitItems', 'items', 'oarepo:search']] - _process_field(mb, src, path_list, config, outputs) - - path_list = [['testComplexItems'], - ['testComplexItems', 'type'], - ['testComplexItems', 'uniqueItems'], - ['testComplexItems', 'items'], - ['testComplexItems', 'items', 'type'], - ['testComplexItems', 'items', 'properties'], - ['testComplexItems', 'items', 'properties', 'identifiers'], - ['testComplexItems', 'items', 'properties', 'identifiers', 'type'], - ['testComplexItems', 'items', 'properties', 'identifiers', 'items'], - ['testComplexItems', 'items', 'properties', 'identifiers', 'items', 'type'], - ['testComplexItems', 'items', 'properties', 'identifiers', 'items', 'oarepo:search'], - ['testComplexItems', 'items', 'properties', 'identifiers', 'uniqueItems']] - _process_field(mb, src, path_list, config, outputs) - - assert outputs['mapping'].data == res - - -def test_mapping_output(): - config = current_model_builder.model_config - - test_path = '/tmp/test.json' - test_data = { - 'properties': {'field1': {'type': 'keyword'}}, - **config.search['mapping'] - } - mo = MappingOutput(path='/tmp/test.json', data=test_data) - - # 1) Test output initialize - assert mo.output_type == 'mapping' - assert mo.path == test_path - assert mo.data == test_data - - # 2) Test output `save` - assert not os.path.exists(test_path) - mo.save() - assert os.path.exists(test_path) - - with open(test_path, mode='r') as fp: - saved = json.load(fp) - os.remove(test_path) - assert saved == test_data diff --git a/tests/api/test_output.py b/tests/api/test_output.py deleted file mode 100644 index df8cc20d..00000000 --- a/tests/api/test_output.py +++ /dev/null @@ -1,13 +0,0 @@ -from oarepo_model_builder.outputs import BaseOutput - - -def test_base_output(): - bo = BaseOutput('', {'test': 'data'}) - - assert bo.path == '' - assert bo.output_type is None - assert bo.data == {'test': 'data'} - - assert bo.data == { - 'test': 'data', - } diff --git a/tests/api/test_ui.py b/tests/api/test_ui.py deleted file mode 100644 index 3293fc6a..00000000 --- a/tests/api/test_ui.py +++ /dev/null @@ -1,86 +0,0 @@ -import json -import os - -from oarepo_model_builder import Config -from oarepo_model_builder.builders import DataModelBuilder -from oarepo_model_builder.builders import UIBuilder -from oarepo_model_builder.outputs.toml_output import toml_property -from oarepo_model_builder.proxies import current_model_builder - - -def test_ui_builder(model_config): - builder = UIBuilder() - test_path = '/tmp/test.json' - test_pyproject_path = '/tmp/test.toml' - outputs = {} - - # 1) Test that begin properly initializes outputs - builder.begin(model_config, outputs, {}) - assert len(outputs) == 2 - assert outputs['ui'].path.split('/')[-3:] == [ - 'oarepo_model_builder', - 'oarepo_ui', - 'oarepo-model-builder-v1.0.0.json' - ] - assert outputs['ui'].data == {**model_config.ui, 'fields': {}} - assert len(builder.stack) == 2 - assert outputs['pyproject'].props_to_add == [ - toml_property( - section='tool.poetry.plugins.oarepo_ui', - property='oarepo-model-builder', - value='oarepo_model_builder.oarepo_ui:oarepo-model-builder-v1.0.0.json' - )] - - # 2) Test `pre` implementation - - # 2.1) `oarepo:` elements and subtrees are ignored by jsonschema - src = { - 'oarepo:ui': { - 'title': {'cs': 'blah'} - }, - 'properties': { - 'test': { - 'oarepo:ui': { - 'label': { - 'cs': 'pole', 'en': 'field' - }}}}} - b = DataModelBuilder() - outputs = {} - b( - src, - Config({ - **current_model_builder.model_config, - 'ui_path': test_path, - 'pyproject_path': test_pyproject_path, - 'base_dir': '/tmp' - }), - [], outputs, [ - UIBuilder() - ]) - assert outputs['ui'].data == { - 'fields': { - 'test': { - 'label': {'cs': 'pole', 'en': 'field'} - }} - , - 'title': {'cs': 'blah'} - } - assert outputs['ui'].output_type == 'ui' - assert outputs['ui'].path == test_path - outputs['ui'].save() - - with open(test_path, mode='r') as fp: - saved = json.load(fp) - os.remove(test_path) - assert saved == outputs['ui'].data - - outputs['pyproject'].save() - - with open(test_pyproject_path, mode='r') as fp: - saved = fp.read() - # os.remove(test_pyproject_path) - assert saved.strip() == """ -["tool.poetry.plugins.oarepo_ui"] -oarepo-model-builder = "test.json" -""".strip() - diff --git a/tests/data/empty.json b/tests/data/empty.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/tests/data/empty.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tests/mock_open.py b/tests/mock_open.py new file mode 100644 index 00000000..100af16d --- /dev/null +++ b/tests/mock_open.py @@ -0,0 +1,19 @@ +from io import StringIO +from pathlib import Path +from typing import Dict + + +class MockOpen: + + def __init__(self): + self.files: Dict[str, StringIO] = {} + + def __call__(self, fname, mode='r'): + fname = Path(fname).absolute() + if mode == 'r': + if not fname in self.files: + raise FileNotFoundError(f'File {fname} not found. Known files {[f for f in self.files]}') + return StringIO(self.files[fname].getvalue()) + self.files[fname] = StringIO() + self.files[fname].close = lambda: None + return self.files[fname] diff --git a/tests/multilang.py b/tests/multilang.py new file mode 100644 index 00000000..924b9e00 --- /dev/null +++ b/tests/multilang.py @@ -0,0 +1,59 @@ +from oarepo_model_builder.invenio.invenio_record_schema import InvenioRecordSchemaBuilder +from oarepo_model_builder.property_preprocessors import PropertyPreprocessor, process +from oarepo_model_builder.builders.jsonschema import JSONSchemaBuilder +from oarepo_model_builder.builders.mapping import MappingBuilder +from oarepo_model_builder.builders import ReplaceElement +from oarepo_model_builder.utils.deepmerge import deepmerge + + +class MultilangPreprocessor(PropertyPreprocessor): + @process(model_builder=JSONSchemaBuilder, + path='**/properties/*', + condition=lambda current: current.type == 'multilingual') + def modify_multilang_schema(self, data, stack, **kwargs): + data['type'] = 'object' + data['properties'] = { + 'lang': { + 'type': 'string' + }, + 'value': { + 'type': 'string' + } + } + return data + + @process(model_builder=MappingBuilder, + path='**/properties/*', + condition=lambda current: current.type == 'multilingual') + def modify_multilang_mapping(self, data, stack, **kwargs): + raise ReplaceElement({ + stack.top.key: { + 'type': 'object', + 'properties': { + 'lang': { + 'type': 'keyword' + }, + 'value': { + 'type': 'text' + } + } + }, + stack.top.key + '_cs': { + 'type': 'text' + } + }) + + @process(model_builder=InvenioRecordSchemaBuilder, + path='**/properties/*', + condition=lambda current: current.type == 'multilingual') + def modify_multilang_marshmallow(self, data, stack, **kwargs): + data['type'] = 'object' + deepmerge(data.setdefault('oarepo:marshmallow', {}), { + 'imports': [{ + 'import': 'tests.multilang', + 'alias': 'multilingual' + }], + 'class': 'multilingual.MultilingualSchema', + 'nested': True + }) + return data diff --git a/tests/test_builder.py b/tests/test_builder.py new file mode 100644 index 00000000..019c9993 --- /dev/null +++ b/tests/test_builder.py @@ -0,0 +1,32 @@ +from oarepo_model_builder.builder import ModelBuilder +from oarepo_model_builder.schema import ModelSchema +from oarepo_model_builder.model_preprocessors import ModelPreprocessor + + +def test_empty_builder(): + builder = ModelBuilder( + output_builders=[], + outputs=[], + property_preprocessors=[] + ) + outputs = builder.build(ModelSchema('', {'a': 1}), '/tmp/test') + assert outputs == {} + + +def test_transformer(): + class SampleModelPreprocessor(ModelPreprocessor): + def transform(self, schema, settings): + schema.set('test', 1) + + builder = ModelBuilder( + output_builders=[], + outputs=[], + property_preprocessors=[], + model_preprocessors=[SampleModelPreprocessor] + ) + schema = ModelSchema('', {'a': 2}) + builder.build(schema, '/tmp/test') + + assert schema.get('test') == 1 + assert schema.get('a') == 2 + diff --git a/tests/test_fulltext_keyword.py b/tests/test_fulltext_keyword.py new file mode 100644 index 00000000..519dadf5 --- /dev/null +++ b/tests/test_fulltext_keyword.py @@ -0,0 +1,119 @@ +import os + +import json5 +import pytest + +from oarepo_model_builder.builder import ModelBuilder +from oarepo_model_builder.builders.jsonschema import JSONSchemaBuilder +from oarepo_model_builder.builders.mapping import MappingBuilder +from oarepo_model_builder.outputs.jsonschema import JSONSchemaOutput +from oarepo_model_builder.outputs.mapping import MappingOutput +from oarepo_model_builder.outputs.python import PythonOutput +from oarepo_model_builder.property_preprocessors.text_keyword import TextKeywordPreprocessor +from oarepo_model_builder.schema import ModelSchema +from oarepo_model_builder.model_preprocessors.default_values import DefaultValuesModelPreprocessor +from oarepo_model_builder.model_preprocessors.elasticsearch import ElasticsearchModelPreprocessor +from tests.mock_open import MockOpen + + +def get_model_schema(field_type): + return ModelSchema( + '', + { + 'settings': { + 'package': 'test', + 'python': { + 'use_isort': False, + 'use_black': False + } + }, + 'model': { + 'properties': { + 'a': { + 'type': field_type + } + } + } + } + ) + + +@pytest.fixture +def fulltext_builder(): + return ModelBuilder( + output_builders=[JSONSchemaBuilder, MappingBuilder], + outputs=[JSONSchemaOutput, MappingOutput, PythonOutput], + model_preprocessors=[DefaultValuesModelPreprocessor, ElasticsearchModelPreprocessor], + property_preprocessors=[TextKeywordPreprocessor] + ) + + +def test_fulltext(fulltext_builder): + schema = get_model_schema('fulltext') + fulltext_builder.open = MockOpen() + fulltext_builder.build(schema, output_dir='') + + data = json5.load(fulltext_builder.open(os.path.join('test', 'jsonschemas', 'test-1.0.0.json'))) + + assert data == { + 'properties': { + 'a': { + 'type': 'string' + } + } + } + + data = json5.load(fulltext_builder.open(os.path.join('test', 'mappings', 'v7', 'test', 'test-1.0.0.json'))) + + assert data == {'mappings': {'properties': {'a': {'type': 'text'}, + 'created': {'type': 'date'}, + 'id': {'type': 'keyword'}, + 'updated': {'type': 'date'}}}} + + +def test_keyword(fulltext_builder): + schema = get_model_schema('keyword') + fulltext_builder.open = MockOpen() + fulltext_builder.build(schema, output_dir='') + + data = json5.load(fulltext_builder.open(os.path.join('test', 'jsonschemas', 'test-1.0.0.json'))) + + assert data == { + 'properties': { + 'a': { + 'type': 'string' + } + } + } + + data = json5.load(fulltext_builder.open(os.path.join('test', 'mappings', 'v7', 'test', 'test-1.0.0.json'))) + + assert data == {'mappings': {'properties': {'a': {'ignore_above': 50, 'type': 'keyword'}, + 'created': {'type': 'date'}, + 'id': {'type': 'keyword'}, + 'updated': {'type': 'date'}}}} + + +def test_fulltext_keyword(fulltext_builder): + schema = get_model_schema('fulltext-keyword') + fulltext_builder.open = MockOpen() + fulltext_builder.build(schema, output_dir='') + + data = json5.load(fulltext_builder.open(os.path.join('test', 'jsonschemas', 'test-1.0.0.json'))) + + assert data == { + 'properties': { + 'a': { + 'type': 'string' + } + } + } + + data = json5.load(fulltext_builder.open(os.path.join('test', 'mappings', 'v7', 'test', 'test-1.0.0.json'))) + + assert data == {'mappings': {'properties': {'a': {'fields': {'keyword': {'ignore_above': 50, + 'type': 'keyword'}}, + 'type': 'text'}, + 'created': {'type': 'date'}, + 'id': {'type': 'keyword'}, + 'updated': {'type': 'date'}}}} diff --git a/tests/test_is_schema_element.py b/tests/test_is_schema_element.py new file mode 100644 index 00000000..12a03b59 --- /dev/null +++ b/tests/test_is_schema_element.py @@ -0,0 +1,25 @@ +from oarepo_model_builder.utils.schema import model_paths + + +def path_valid(*path): + ms = model_paths + for p in path: + ms = ms.get(p) + return ms.valid + + +def test_is_schema_element(): + assert path_valid('model') + assert path_valid('model', 'type') + assert path_valid('model', '$id') + assert path_valid('model', '$schema') + assert path_valid('model', 'properties') + assert path_valid('model', 'properties', 'a') + assert path_valid('model', 'properties', 'a', 'type') + assert path_valid('model', 'properties', 'a', 'items') + assert path_valid('model', 'properties', 'a', 'items', 'type') + assert path_valid('model', 'properties', 'a', 'properties') + + assert not path_valid('test') + assert not path_valid('model', 'test') + assert not path_valid('model', 'properties', 'a', 'test') diff --git a/tests/test_json_pathlib.py b/tests/test_json_pathlib.py new file mode 100644 index 00000000..c09f788c --- /dev/null +++ b/tests/test_json_pathlib.py @@ -0,0 +1,72 @@ +from oarepo_model_builder.utils.json_pathlib import PathCondition, JSONPaths + + +def test_condition(): + current = PathCondition( + start={ + 'a': { + 'b': 1 + } + } + ) + assert current.a.b._subtree_list == [1] + + assert current.a.b == 1 + assert not (current.a.b == 2) + assert current.a.b != 2 + + +def test_condition_star(): + current = PathCondition( + start={ + 'a': { + 'b': 1, + 'c': 2 + } + } + ) + assert current.a['*']._subtree_list == [1, 2] + + assert current.a['*'] == 1 + assert current.a['*'] == 2 + assert current.a['*'] != 3 + + +def test_condition_double_star(): + current = PathCondition(start={ + 'a': { + 'b': 1, + 'c': { + 'd': 2 + } + } + }) + assert current.a['**']._subtree_list == [ + 1, + {'d': 2}, + 2 + ] + + +def test_path_simple(): + p = JSONPaths() + p.register(path='/a/b', value=1) + assert list(p.match('/a/b')) == [1] + assert list(p.match('/a')) == [] + + +def test_path_locator(): + p = JSONPaths() + p.register(path='/a/b', condition=lambda current: current.a == 1, value=1) + assert list(p.match('/a/b', {'a': 1})) == [1] + assert list(p.match('/a/b', {'a': 2})) == [] + + +def test_path_multiple_locators(): + p = JSONPaths() + p.register(path='/a/b', condition=lambda current: current.a == 1, value=1) + p.register(path='/a/b', condition=lambda current: current.b == 1, value=2) + assert list(p.match('/a/b', {'a': 1})) == [1] + assert list(p.match('/a/b', {'b': 1})) == [2] + assert list(p.match('/a/b', {'a': 1, 'b': 1})) == [1, 2] + assert list(p.match('/a/b', {'a': 2})) == [] diff --git a/tests/test_json_stack.py b/tests/test_json_stack.py new file mode 100644 index 00000000..9fcb80b1 --- /dev/null +++ b/tests/test_json_stack.py @@ -0,0 +1,21 @@ +from oarepo_model_builder.outputs.json_stack import JSONStack + + +def test_json_stack(): + st = JSONStack() + st.push(None, {}) + st.push('a', 1) + st.pop() + st.push('b', {}) + st.push('c', 2) + st.pop() + st.pop() + st.push('d', []) + st.push(0, 3) + st.pop() + st.push(1, 4) + st.pop() + st.pop() + st.pop() + st.pop() + assert st.value == {'a': 1, 'b': {'c': 2}, 'd': [3, 4]} diff --git a/tests/test_jsonchema_builder.py b/tests/test_jsonchema_builder.py new file mode 100644 index 00000000..64ad8290 --- /dev/null +++ b/tests/test_jsonchema_builder.py @@ -0,0 +1,113 @@ +import os + +from oarepo_model_builder.builder import ModelBuilder +from oarepo_model_builder.outputs.jsonschema import JSONSchemaOutput +from oarepo_model_builder.outputs.python import PythonOutput +from oarepo_model_builder.schema import ModelSchema +from oarepo_model_builder.model_preprocessors.default_values import DefaultValuesModelPreprocessor +from oarepo_model_builder.builders.jsonschema import JSONSchemaBuilder +from tests.mock_open import MockOpen +from tests.multilang import MultilangPreprocessor + +try: + import json5 +except ImportError: + import json as json5 + + +def test_simple_jsonschema_builder(): + builder = ModelBuilder( + output_builders=[JSONSchemaBuilder], + outputs=[JSONSchemaOutput, PythonOutput], + model_preprocessors=[DefaultValuesModelPreprocessor], + open=MockOpen() + ) + builder.build( + schema=ModelSchema( + '', + { + 'settings': { + 'package': 'test', + 'python': { + 'use_isort': False, + 'use_black': False + } + }, + 'model': { + 'properties': { + 'a': { + 'type': 'string', + 'oarepo:ui': { + 'class': 'bolder' + } + } + } + } + } + ), + output_dir='' + ) + + data = json5.load(builder.open(os.path.join('test', 'jsonschemas', 'test-1.0.0.json'))) + + assert data == { + 'properties': { + 'a': { + 'type': 'string' + } + } + } + + +def test_jsonschema_preprocessor(): + builder = ModelBuilder( + output_builders=[JSONSchemaBuilder], + outputs=[JSONSchemaOutput, PythonOutput], + model_preprocessors=[DefaultValuesModelPreprocessor], + property_preprocessors=[MultilangPreprocessor], + open=MockOpen() + ) + + builder.build( + schema=ModelSchema( + '', + { + 'settings': { + 'package': 'test', + 'python': { + 'use_isort': False, + 'use_black': False + } + }, + 'model': { + 'properties': { + 'a': { + 'type': 'multilingual', + 'oarepo:ui': { + 'class': 'bolder' + } + } + } + } + } + ), + output_dir='' + ) + + data = json5.load(builder.open(os.path.join('test', 'jsonschemas', 'test-1.0.0.json'))) + + assert data == { + 'properties': { + 'a': { + 'type': 'object', + 'properties': { + 'lang': { + 'type': 'string' + }, + 'value': { + 'type': 'string' + } + } + } + } + } diff --git a/tests/test_jsonschema_output.py b/tests/test_jsonschema_output.py new file mode 100644 index 00000000..2ee024ef --- /dev/null +++ b/tests/test_jsonschema_output.py @@ -0,0 +1,34 @@ +from pathlib import Path + +from oarepo_model_builder.outputs.jsonschema import JSONSchemaOutput +from tests.mock_open import MockOpen + +try: + import json5 +except ImportError: + import json as json5 + + +def test_create_simple_schema(): + + class FakeBuilder: + open = MockOpen() + + output = JSONSchemaOutput(FakeBuilder(), Path('blah.json')) + output.begin() + output.enter('properties', {}) + output.enter('a', {}) + output.primitive('type', 'string') + output.leave() + output.leave() + output.finish() + + data = json5.load(FakeBuilder.open('blah.json')) + + assert data == { + 'properties': { + 'a': { + 'type': 'string' + } + } + } diff --git a/tests/test_loading.py b/tests/test_loading.py new file mode 100644 index 00000000..f2c9075d --- /dev/null +++ b/tests/test_loading.py @@ -0,0 +1,49 @@ +from pathlib import Path + +from oarepo_model_builder.loaders import json_loader +from oarepo_model_builder.schema import ModelSchema + + +def test_loading_from_string(): + schema = ModelSchema('/tmp/path.json', {}) + assert schema.schema == {'settings': {'plugins':{}}} + + +def test_loading_from_empty_file(): + schema = ModelSchema(Path(__file__).parent.joinpath('data/empty.json'), loaders={ + 'json': json_loader + }) + assert schema.schema == {'settings': {'plugins':{}}} + + +def test_loading_included_resource(): + schema = ModelSchema('/tmp/path.json', { + 'a': { + 'oarepo:use': 'test1' + } + }, { + 'test1': lambda schema: { + 'included': 'test1' + } + }) + assert schema.schema == { + 'settings': {'plugins':{}}, + 'a': { + 'included': 'test1' + } + } + + +def test_loading_included_resource_root(): + schema = ModelSchema( + '/tmp/path.json', { + 'oarepo:use': 'test1' + }, { + 'test1': lambda schema: { + 'included': 'test1' + } + }) + assert schema.schema == { + 'settings': {'plugins':{}}, + 'included': 'test1' + } diff --git a/tests/test_mapping_builder.py b/tests/test_mapping_builder.py new file mode 100644 index 00000000..d5eb6edd --- /dev/null +++ b/tests/test_mapping_builder.py @@ -0,0 +1,121 @@ +import os + +from oarepo_model_builder.builder import ModelBuilder +from oarepo_model_builder.outputs.mapping import MappingOutput +from oarepo_model_builder.outputs.python import PythonOutput +from oarepo_model_builder.schema import ModelSchema +from oarepo_model_builder.model_preprocessors.default_values import DefaultValuesModelPreprocessor +from oarepo_model_builder.builders.mapping import MappingBuilder +from tests.mock_open import MockOpen +from tests.multilang import MultilangPreprocessor + +try: + import json5 +except ImportError: + import json as json5 + + +def test_simple_mapping_builder(): + builder = ModelBuilder( + output_builders=[MappingBuilder], + outputs=[MappingOutput, PythonOutput], + model_preprocessors=[DefaultValuesModelPreprocessor], + open=MockOpen() + ) + builder.build( + schema=ModelSchema( + '', + { + 'settings': { + 'package': 'test', + 'python': { + 'use_isort': False, + 'use_black': False + }, + 'elasticsearch': { + 'version': 'v7', + 'templates': { + 'v7': {} + } + } + }, + 'model': { + 'properties': { + 'a': { + 'type': 'string', + 'oarepo:mapping': { + 'type': 'text' + } + } + } + } + } + ), + output_dir='' + ) + + data = json5.load(builder.open(os.path.join('test', 'mappings', 'v7', 'test', 'test-1.0.0.json'))) + + assert data == {'mappings': {'properties': {'a': {'type': 'text'}}}} + + +def test_mapping_preprocessor(): + builder = ModelBuilder( + output_builders=[MappingBuilder], + outputs=[MappingOutput, PythonOutput], + model_preprocessors=[DefaultValuesModelPreprocessor], + property_preprocessors=[MultilangPreprocessor], + open=MockOpen() + ) + + builder.build( + schema=ModelSchema( + '', + { + 'settings': { + 'package': 'test', + 'python': { + 'use_isort': False, + 'use_black': False + }, + 'elasticsearch': { + 'version': 'v7', + 'templates': { + 'v7': {} + } + } + }, + 'model': { + 'properties': { + 'a': { + 'type': 'multilingual' + } + } + } + } + ), + output_dir='' + ) + + data = json5.load(builder.open(os.path.join('test', 'mappings', 'v7', 'test', 'test-1.0.0.json'))) + + assert data == { + "mappings": { + 'properties': { + 'a': { + 'type': 'object', + 'properties': { + 'lang': { + 'type': 'keyword' + }, + 'value': { + 'type': 'text' + } + } + }, + 'a_cs': { + 'type': 'text' + } + } + } + } diff --git a/tests/test_marshmallow_builder.py b/tests/test_marshmallow_builder.py new file mode 100644 index 00000000..7b078ea1 --- /dev/null +++ b/tests/test_marshmallow_builder.py @@ -0,0 +1,73 @@ +import os + +import json5 +import pytest + +from oarepo_model_builder.builder import ModelBuilder +from oarepo_model_builder.builders.jsonschema import JSONSchemaBuilder +from oarepo_model_builder.builders.mapping import MappingBuilder +from oarepo_model_builder.invenio.invenio_record_schema import InvenioRecordSchemaBuilder +from oarepo_model_builder.model_preprocessors.invenio import InvenioModelPreprocessor +from oarepo_model_builder.outputs.jsonschema import JSONSchemaOutput +from oarepo_model_builder.outputs.mapping import MappingOutput +from oarepo_model_builder.outputs.python import PythonOutput +from oarepo_model_builder.property_preprocessors.text_keyword import TextKeywordPreprocessor +from oarepo_model_builder.schema import ModelSchema +from oarepo_model_builder.model_preprocessors.default_values import DefaultValuesModelPreprocessor +from oarepo_model_builder.model_preprocessors.elasticsearch import ElasticsearchModelPreprocessor +from tests.mock_open import MockOpen + + +def get_model_schema(field_type): + return ModelSchema( + '', + { + 'settings': { + 'package': 'test', + 'python': { + 'use_isort': False, + 'use_black': False + } + }, + 'model': { + 'properties': { + 'a': { + 'type': field_type + } + } + } + } + ) + + +@pytest.fixture +def fulltext_builder(): + return ModelBuilder( + output_builders=[InvenioRecordSchemaBuilder], + outputs=[PythonOutput], + model_preprocessors=[DefaultValuesModelPreprocessor, ElasticsearchModelPreprocessor, InvenioModelPreprocessor], + property_preprocessors=[TextKeywordPreprocessor] + ) + + +def _test(fulltext_builder, string_type): + schema = get_model_schema(string_type) + fulltext_builder.open = MockOpen() + fulltext_builder.build(schema, output_dir='') + + with fulltext_builder.open(os.path.join('test', 'schema.py')) as f: + data = f.read() + + assert 'a = ma_fields.String()' in data + + +def test_fulltext(fulltext_builder): + _test(fulltext_builder, 'fulltext') + + +def test_keyword(fulltext_builder): + _test(fulltext_builder, 'keyword') + + +def test_fulltext_keyword(fulltext_builder): + _test(fulltext_builder, 'fulltext-keyword') diff --git a/tests/test_merge.py b/tests/test_merge.py new file mode 100644 index 00000000..c3496e7d --- /dev/null +++ b/tests/test_merge.py @@ -0,0 +1,22 @@ +from oarepo_model_builder.utils.deepmerge import deepmerge + + +def test_merge_simple(): + assert deepmerge(1, 2, []) == 1 + + +def test_merge_dict(): + assert deepmerge(dict(a=1), dict(b=2), []) == dict(a=1, b=2) + assert deepmerge(dict(a=1), dict(a=2), []) == dict(a=1) + + assert deepmerge(dict(a=dict(a=1)), dict(a=dict(a=2)), []) == dict(a=dict(a=1)) + assert deepmerge(dict(a=dict(a=1)), dict(a=dict(b=2), c=3), []) == dict(a=dict(a=1, b=2), c=3) + + +def test_merge_list(): + assert deepmerge([1, 2], [3, 4], []) == [1, 2] + assert deepmerge([1, 2], [3, 4, 5], []) == [1, 2, 5] + + +def test_merge_list_dict(): + assert deepmerge([dict(a=1)], [dict(b=1)], []) == [dict(a=1, b=1)] diff --git a/tests/test_merging_transformer.py b/tests/test_merging_transformer.py new file mode 100644 index 00000000..5c1d8e28 --- /dev/null +++ b/tests/test_merging_transformer.py @@ -0,0 +1,204 @@ +import libcst as cst + +from oarepo_model_builder.utils.cst import MergingTransformer + + +def test_new_class(): + existing_module = "# comment start" + included_module = """ +# comment before +class Blah: + # comment + pass + """.strip() + original_cst = cst.parse_module(existing_module) + included_cst = cst.parse_module(included_module, + config=original_cst.config_for_parsing) + transformed_cst = original_cst.visit(MergingTransformer(included_cst)) + + assert transformed_cst.code.strip() == """ +# comment start +class Blah: + # comment + pass + """.strip() + + +def test_existing_class(): + existing_module = """ +# comment start +class Blah: + # comment + pass + """.strip() + included_module = existing_module + + original_cst = cst.parse_module(existing_module) + included_cst = cst.parse_module(included_module, + config=original_cst.config_for_parsing) + transformed_cst = original_cst.visit(MergingTransformer(included_cst)) + + assert transformed_cst.code.strip() == existing_module.strip() + + +def test_new_function(): + existing_module = "# comment start" + included_module = """ +# comment before +def a(): + # comment + return 1 + """.strip() + original_cst = cst.parse_module(existing_module) + included_cst = cst.parse_module(included_module, + config=original_cst.config_for_parsing) + transformed_cst = original_cst.visit(MergingTransformer(included_cst)) + + assert transformed_cst.code.strip() == """ +# comment start +def a(): + # comment + return 1 + """.strip() + + +def test_existing_function(): + existing_module = """ +# comment start +def a(): + # comment + return 1 + """.strip() + included_module = """ +def a(): + return 2 + """.strip() + + original_cst = cst.parse_module(existing_module) + included_cst = cst.parse_module(included_module, + config=original_cst.config_for_parsing) + transformed_cst = original_cst.visit(MergingTransformer(included_cst)) + + assert transformed_cst.code.strip() == existing_module.strip() + + +def test_new_method(): + existing_module = """ +# comment start +class Blah: + # comment before + def b(self): + return 2 + """.strip() + included_module = """ +class Blah: + # comment 2 before + def a(self): + return 1 + """.strip() + + original_cst = cst.parse_module(existing_module) + included_cst = cst.parse_module(included_module, + config=original_cst.config_for_parsing) + transformed_cst = original_cst.visit(MergingTransformer(included_cst)) + + assert transformed_cst.code.strip() == """ +# comment start +class Blah: + # comment before + def b(self): + return 2 + # comment 2 before + def a(self): + return 1 + """.strip() + +def test_existing_method(): + existing_module = """ +# comment start +class Blah: + # comment before + def b(self): + return 2 + """.strip() + included_module = """ +class Blah: + # comment 2 before + def b(self): + return 1 + """.strip() + + original_cst = cst.parse_module(existing_module) + included_cst = cst.parse_module(included_module, + config=original_cst.config_for_parsing) + transformed_cst = original_cst.visit(MergingTransformer(included_cst)) + + assert transformed_cst.code.strip() == """ +# comment start +class Blah: + # comment before + def b(self): + return 2 + """.strip() + + +def test_new_include(): + existing_module = "# comment start" + included_module = """ +# comment before +import pathlib +from pathlib import ( + Blah +) +# comment after +class Blah: + pass + """.strip() + original_cst = cst.parse_module(existing_module) + included_cst = cst.parse_module(included_module, + config=original_cst.config_for_parsing) + transformed_cst = original_cst.visit(MergingTransformer(included_cst)) + + assert transformed_cst.code.strip() == """ +# comment start +import pathlib +from pathlib import ( + Blah +) +# comment after +class Blah: + pass + """.strip() + + +def test_existing_include(): + existing_module = """ +# comment start +from c import d +import b +""".strip() + included_module = """ +# comment before +import b +import pathlib +from pathlib import ( + Blah +) +from c import d +from c import d, e + """.strip() + original_cst = cst.parse_module(existing_module) + included_cst = cst.parse_module(included_module, + config=original_cst.config_for_parsing) + transformed_cst = original_cst.visit(MergingTransformer(included_cst)) + + assert transformed_cst.code.strip() == """ +# comment start +import pathlib +from pathlib import ( + Blah +) +from c import d, e +from c import d +import b + """.strip() diff --git a/tests/test_oarepo_model_builder.py b/tests/test_oarepo_model_builder.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/test_plugin_configuration.py b/tests/test_plugin_configuration.py new file mode 100644 index 00000000..fc480877 --- /dev/null +++ b/tests/test_plugin_configuration.py @@ -0,0 +1,69 @@ +from oarepo_model_builder.builder import ModelBuilder +from oarepo_model_builder.outputs.jsonschema import JSONSchemaOutput +from oarepo_model_builder.outputs.mapping import MappingOutput +from oarepo_model_builder.schema import ModelSchema + + +def test_output_disabled(): + builder = ModelBuilder() + schema = ModelSchema('', { + 'settings': { + 'plugins': { + 'output': { + 'disabled': '__all__' + } + } + } + }) + builder.set_schema(schema) + assert builder._filter_classes([JSONSchemaOutput, MappingOutput], 'output') == [] + + +def test_output_disabled_single(): + builder = ModelBuilder() + schema = ModelSchema('', { + 'settings': { + 'plugins': { + 'output': { + 'disabled': ['jsonschema'] + } + } + } + }) + builder.set_schema(schema) + assert set(x.TYPE for x in builder._filter_classes([JSONSchemaOutput, MappingOutput], 'output')) == {'mapping'} + + +def test_output_enabled(): + builder = ModelBuilder() + schema = ModelSchema('', { + 'settings': { + 'plugins': { + 'output': { + 'disabled': '__all__', + 'enabled': [ + 'mapping' + ] + } + } + } + }) + builder.set_schema(schema) + assert set(x.TYPE for x in builder._filter_classes([JSONSchemaOutput, MappingOutput], 'output')) == {'mapping'} + + +def test_output_enabled_import(): + builder = ModelBuilder() + schema = ModelSchema('', { + 'settings': { + 'plugins': { + 'output': { + 'enabled': [ + 'oarepo_model_builder.outputs.mapping:MappingOutput' + ] + } + } + } + }) + builder.set_schema(schema) + assert set(x.TYPE for x in builder._filter_classes([], 'output')) == {'mapping'} \ No newline at end of file diff --git a/tests/test_stack.py b/tests/test_stack.py new file mode 100644 index 00000000..e6aad441 --- /dev/null +++ b/tests/test_stack.py @@ -0,0 +1,45 @@ +from oarepo_model_builder.schema import ModelSchema +from oarepo_model_builder.stack import ModelBuilderStack, ModelBuilderStackEntry + + +def test_stack(): + schema = ModelSchema('', { + 'a': { + 'b': [1, 2], + 'c': 3 + }, + 'd': 4, + 'e': { + 'f': 5 + } + }) + stack = ModelBuilderStack(schema) + # pop the empty settings added by the schema + schema.schema.pop('settings') + out = [] + + def on_data(stack): + if stack.top_type != stack.PRIMITIVE: + out.append(('enter', stack.top, stack.level)) + yield + out.append(('leave', stack.top, stack.level)) + else: + out.append(('primitive', stack.top, stack.level)) + + stack.process(on_data) + + assert out == [ + ('enter', ModelBuilderStackEntry(key=None, data={'a': {'b': [1, 2], 'c': 3}, 'd': 4, 'e': {'f': 5}}), 1), + ('enter', ModelBuilderStackEntry(key='a', data={'b': [1, 2], 'c': 3}), 2), + ('enter', ModelBuilderStackEntry(key='b', data=[1, 2]), 3), + ('primitive', ModelBuilderStackEntry(key=0, data=1), 4), + ('primitive', ModelBuilderStackEntry(key=1, data=2), 4), + ('leave', ModelBuilderStackEntry(key='b', data=[1, 2]), 3), + ('primitive', ModelBuilderStackEntry(key='c', data=3), 3), + ('leave', ModelBuilderStackEntry(key='a', data={'b': [1, 2], 'c': 3}), 2), + ('primitive', ModelBuilderStackEntry(key='d', data=4), 2), + ('enter', ModelBuilderStackEntry(key='e', data={'f': 5}), 2), + ('primitive', ModelBuilderStackEntry(key='f', data=5), 3), + ('leave', ModelBuilderStackEntry(key='e', data={'f': 5}), 2), + ('leave', ModelBuilderStackEntry(key=None, data={'a': {'b': [1, 2], 'c': 3}, 'd': 4, 'e': {'f': 5}}), 1) + ] diff --git a/tests/test_template_registry.py b/tests/test_template_registry.py new file mode 100644 index 00000000..7757182b --- /dev/null +++ b/tests/test_template_registry.py @@ -0,0 +1,22 @@ +from oarepo_model_builder.templates import templates +from oarepo_model_builder.utils.hyphen_munch import HyphenMunch + + +def test_load_default_template(): + settings = { + 'python': { + 'templates': {}, + } + } + assert templates.get_template('record', HyphenMunch(settings)) + + +def test_load_template_in_settings(): + settings = { + 'python': { + 'templates': { + 'blah': __file__ + } + } + } + assert templates.get_template('blah', HyphenMunch(settings))