diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..75ccb7c --- /dev/null +++ b/.gitignore @@ -0,0 +1,73 @@ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask instance folder +instance/ + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# dotenv +.env diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a74f044 --- /dev/null +++ b/LICENSE @@ -0,0 +1,274 @@ +European Union Public Licence +V. 1.2 + +EUPL © the European Union 2007, 2016 + +This European Union Public Licence (the ‘EUPL’) applies to the Work (as +defined below) which is provided under the terms of this Licence. Any use of +the Work, other than as authorised under this Licence is prohibited (to the +extent such use is covered by a right of the copyright holder of the Work). + +The Work is provided under the terms of this Licence when the Licensor (as +defined below) has placed the following notice immediately following the +copyright notice for the Work: “Licensed under the EUPL”, or has expressed by +any other means his willingness to license under the EUPL. + +1. Definitions + +In this Licence, the following terms have the following meaning: +— ‘The Licence’: this Licence. +— ‘The Original Work’: the work or software distributed or communicated by the + ‘Licensor under this Licence, available as Source Code and also as + ‘Executable Code as the case may be. +— ‘Derivative Works’: the works or software that could be created by the + ‘Licensee, based upon the Original Work or modifications thereof. This + ‘Licence does not define the extent of modification or dependence on the + ‘Original Work required in order to classify a work as a Derivative Work; + ‘this extent is determined by copyright law applicable in the country + ‘mentioned in Article 15. +— ‘The Work’: the Original Work or its Derivative Works. +— ‘The Source Code’: the human-readable form of the Work which is the most + convenient for people to study and modify. + +— ‘The Executable Code’: any code which has generally been compiled and which + is meant to be interpreted by a computer as a program. +— ‘The Licensor’: the natural or legal person that distributes or communicates + the Work under the Licence. +— ‘Contributor(s)’: any natural or legal person who modifies the Work under + the Licence, or otherwise contributes to the creation of a Derivative Work. +— ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of + the Work under the terms of the Licence. +— ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, + renting, distributing, communicating, transmitting, or otherwise making + available, online or offline, copies of the Work or providing access to its + essential functionalities at the disposal of any other natural or legal + person. + +2. Scope of the rights granted by the Licence + +The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, +sublicensable licence to do the following, for the duration of copyright +vested in the Original Work: + +— use the Work in any circumstance and for all usage, +— reproduce the Work, +— modify the Work, and make Derivative Works based upon the Work, +— communicate to the public, including the right to make available or display + the Work or copies thereof to the public and perform publicly, as the case + may be, the Work, +— distribute the Work or copies thereof, +— lend and rent the Work or copies thereof, +— sublicense rights in the Work or copies thereof. + +Those rights can be exercised on any media, supports and formats, whether now +known or later invented, as far as the applicable law permits so. + +In the countries where moral rights apply, the Licensor waives his right to +exercise his moral right to the extent allowed by law in order to make +effective the licence of the economic rights here above listed. + +The Licensor grants to the Licensee royalty-free, non-exclusive usage rights +to any patents held by the Licensor, to the extent necessary to make use of +the rights granted on the Work under this Licence. + +3. Communication of the Source Code + +The Licensor may provide the Work either in its Source Code form, or as +Executable Code. If the Work is provided as Executable Code, the Licensor +provides in addition a machine-readable copy of the Source Code of the Work +along with each copy of the Work that the Licensor distributes or indicates, +in a notice following the copyright notice attached to the Work, a repository +where the Source Code is easily and freely accessible for as long as the +Licensor continues to distribute or communicate the Work. + +4. Limitations on copyright + +Nothing in this Licence is intended to deprive the Licensee of the benefits +from any exception or limitation to the exclusive rights of the rights owners +in the Work, of the exhaustion of those rights or of other applicable +limitations thereto. + +5. Obligations of the Licensee + +The grant of the rights mentioned above is subject to some restrictions and +obligations imposed on the Licensee. Those obligations are the following: + +Attribution right: The Licensee shall keep intact all copyright, patent or +trademarks notices and all notices that refer to the Licence and to the +disclaimer of warranties. The Licensee must include a copy of such notices and +a copy of the Licence with every copy of the Work he/she distributes or +communicates. The Licensee must cause any Derivative Work to carry prominent +notices stating that the Work has been modified and the date of modification. + +Copyleft clause: If the Licensee distributes or communicates copies of the +Original Works or Derivative Works, this Distribution or Communication will be +done under the terms of this Licence or of a later version of this Licence +unless the Original Work is expressly distributed only under this version of +the Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee +(becoming Licensor) cannot offer or impose any additional terms or conditions +on the Work or Derivative Work that alter or restrict the terms of the +Licence. + +Compatibility clause: If the Licensee Distributes or Communicates Derivative +Works or copies thereof based upon both the Work and another work licensed +under a Compatible Licence, this Distribution or Communication can be done +under the terms of this Compatible Licence. For the sake of this clause, +‘Compatible Licence’ refers to the licences listed in the appendix attached to +this Licence. Should the Licensee's obligations under the Compatible Licence +conflict with his/her obligations under this Licence, the obligations of the +Compatible Licence shall prevail. + +Provision of Source Code: When distributing or communicating copies of the +Work, the Licensee will provide a machine-readable copy of the Source Code or +indicate a repository where this Source will be easily and freely available +for as long as the Licensee continues to distribute or communicate the Work. + +Legal Protection: This Licence does not grant permission to use the trade +names, trademarks, service marks, or names of the Licensor, except as required +for reasonable and customary use in describing the origin of the Work and +reproducing the content of the copyright notice. + +6. Chain of Authorship + +The original Licensor warrants that the copyright in the Original Work granted +hereunder is owned by him/her or licensed to him/her and that he/she has the +power and authority to grant the Licence. + +Each Contributor warrants that the copyright in the modifications he/she +brings to the Work are owned by him/her or licensed to him/her and that he/she +has the power and authority to grant the Licence. + +Each time You accept the Licence, the original Licensor and subsequent +Contributors grant You a licence to their contributions to the Work, under the +terms of this Licence. + +7. Disclaimer of Warranty + +The Work is a work in progress, which is continuously improved by numerous +Contributors. It is not a finished work and may therefore contain defects or +‘bugs’ inherent to this type of development. + +For the above reason, the Work is provided under the Licence on an ‘as is’ +basis and without warranties of any kind concerning the Work, including +without limitation merchantability, fitness for a particular purpose, absence +of defects or errors, accuracy, non-infringement of intellectual property +rights other than copyright as stated in Article 6 of this Licence. + +This disclaimer of warranty is an essential part of the Licence and a +condition for the grant of any rights to the Work. + +8. Disclaimer of Liability + +Except in the cases of wilful misconduct or damages directly caused to natural +persons, the Licensor will in no event be liable for any direct or indirect, +material or moral, damages of any kind, arising out of the Licence or of the +use of the Work, including without limitation, damages for loss of goodwill, +work stoppage, computer failure or malfunction, loss of data or any commercial +damage, even if the Licensor has been advised of the possibility of such +damage. However, the Licensor will be liable under statutory product liability +laws as far such laws apply to the Work. + +9. Additional agreements + +While distributing the Work, You may choose to conclude an additional +agreement, defining obligations or services consistent with this Licence. +However, if accepting obligations, You may act only on your own behalf and on +your sole responsibility, not on behalf of the original Licensor or any other +Contributor, and only if You agree to indemnify, defend, and hold each +Contributor harmless for any liability incurred by, or claims asserted against +such Contributor by the fact You have accepted any warranty or additional +liability. + +10. Acceptance of the Licence + +The provisions of this Licence can be accepted by clicking on an icon ‘I +agree’ placed under the bottom of a window displaying the text of this Licence +or by affirming consent in any other similar way, in accordance with the rules +of applicable law. Clicking on that icon indicates your clear and irrevocable +acceptance of this Licence and all of its terms and conditions. + +Similarly, you irrevocably accept this Licence and all of its terms and +conditions by exercising any rights granted to You by Article 2 of this +Licence, such as the use of the Work, the creation by You of a Derivative Work +or the Distribution or Communication by You of the Work or copies thereof. + +11. Information to the public + +In case of any Distribution or Communication of the Work by means of +electronic communication by You (for example, by offering to download the Work +from a remote location) the distribution channel or media (for example, a +website) must at least provide to the public the information requested by the +applicable law regarding the Licensor, the Licence and the way it may be +accessible, concluded, stored and reproduced by the Licensee. + +12. Termination of the Licence + +The Licence and the rights granted hereunder will terminate automatically upon +any breach by the Licensee of the terms of the Licence. Such a termination +will not terminate the licences of any person who has received the Work from +the Licensee under the Licence, provided such persons remain in full +compliance with the Licence. + +13. Miscellaneous + +Without prejudice of Article 9 above, the Licence represents the complete +agreement between the Parties as to the Work. + +If any provision of the Licence is invalid or unenforceable under applicable +law, this will not affect the validity or enforceability of the Licence as a +whole. Such provision will be construed or reformed so as necessary to make it +valid and enforceable. + +The European Commission may publish other linguistic versions or new versions +of this Licence or updated versions of the Appendix, so far this is required +and reasonable, without reducing the scope of the rights granted by the +Licence. New versions of the Licence will be published with a unique version +number. + +All linguistic versions of this Licence, approved by the European Commission, +have identical value. Parties can take advantage of the linguistic version of +their choice. + +14. Jurisdiction + +Without prejudice to specific agreement between parties, +— any litigation resulting from the interpretation of this License, arising + between the European Union institutions, bodies, offices or agencies, as a + Licensor, and any Licensee, will be subject to the jurisdiction of the Court + of Justice of the European Union, as laid down in article 272 of the Treaty + on the Functioning of the European Union, +— any litigation arising between other parties and resulting from the + interpretation of this License, will be subject to the exclusive + jurisdiction of the competent court where the Licensor resides or conducts + its primary business. + +15. Applicable Law + +Without prejudice to specific agreement between parties, +— this Licence shall be governed by the law of the European Union Member State + where the Licensor has his seat, resides or has his registered office, +— this licence shall be governed by Belgian law if the Licensor has no seat, + residence or registered office inside a European Union Member State. + +Appendix + +‘Compatible Licences’ according to Article 5 EUPL are: +— GNU General Public License (GPL) v. 2, v. 3 +— GNU Affero General Public License (AGPL) v. 3 +— Open Software License (OSL) v. 2.1, v. 3.0 +— Eclipse Public License (EPL) v. 1.0 +— CeCILL v. 2.0, v. 2.1 +— Mozilla Public Licence (MPL) v. 2 +— GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 +— Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for + works other than software +— European Union Public Licence (EUPL) v. 1.1, v. 1.2 +— Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or + Strong Reciprocity (LiLiQ-R+) + +— The European Commission may update this Appendix to later versions of the + above licences without producing a new version of the EUPL, as long as they + provide the rights granted in Article 2 of this Licence and protect the + covered Source Code from exclusive appropriation. +— All other changes or additions to this Appendix require the production of a + new EUPL version. diff --git a/README.md b/README.md new file mode 100644 index 0000000..afd0424 --- /dev/null +++ b/README.md @@ -0,0 +1,254 @@ +# CB-EDP +CEF Context Broker integration with the European Data Portal. + +This Integration Solution generates from the parameters established in a +configuration file an RDF/XML file containing the datasets representing +Context Broker Data Models chosen to integrate. The output is available +at the location where the solution is deployed. + +The Python module to install contains the following main components: + +- Integration Solution core component [`cb_edp/`](cb_edp/): Command + line interface (CLI) application that offers the options needed to + work with the integration. +- Integration Solution API [`cb_edp/api`](cb_edp/api/): The scope of + this Flask developed API is to allow the accessing to the data from a + dataset in the RDF file using a custom call. It also provides + generated RDF through a static URL in order to have it always + available on the Internet. + +## Getting Started +The following instructions will allow you to get a completely functional +CB-EDP environment. This is just a guideline about how to install and +deploy the solution. Adapt it to your needs. + +### Prerequisites +CB-EDP has some requirements that should be accomplished before starting +deploying it. + +- Ubuntu 18.04.1 LTS 64-bit or later +- Python 3.6 or later +- pip3 9.0.1 or later + +Update packages list in case you didn't do before (recommended): +``` +sudo apt update +``` + +#### Python 3.7 installation +Python 3 is already installed in Ubuntu 18 distributions. However, in +case you want to use Python 3.7, follow the next steps to install it. + +First update packages list and install the prerequisites: +``` +sudo apt install software-properties-common +``` + +Then add the deadsnakes PPA to your sources list: +``` +sudo add-apt-repository ppa:deadsnakes/ppa +``` + +Last, install Python 3.7 with: +``` +sudo apt install python3.7 +``` + +You can verify if everything is alright just typing (it should print +Python version number): +``` +$ python3.7 --version +Python 3.7.3 +``` + +#### pip3 installation +pip3 will be used as the package manager for Python. It will be used for +CB-EDP installation, so must be installed before starting the +deployment. + +After packages list update, install pip for Python 3: +``` +sudo apt install python3-pip +``` + +You can verify the installation typing: +``` +$ pip3 --version +pip 9.0.1 from /usr/lib/python3/dist-packages (python 3.7) +``` + +### Installing and deploying +#### CB-EDP core component +To install the Integration Solution, download this repository as a ZIP +file and move it to the machine where you want to deploy it. Once you +got it, install it using pip: +``` +sudo pip3 install /path/to/cb_edp.zip +``` + +It should have installed too every dependency (Click, configobj, Flask, +Gunicorn, requests and time-uuid) of the CB-EDP. In case it didn't or +you aren't sure of it, install them directly using +[`requirements.txt`](requirements.txt) file. First unzip it cause it's +on downloaded ZIP file: +``` +unzip /path/to/cb_edp.zip +pip3 install -r /path/to/requirements.txt +``` + +You can check it's installed launching `show` pip3 command: +``` +$ pip3 show cb-edp +--- +Metadata-Version: 1.0 +Name: cb-edp +Version: 1.0 +Summary: FIWARE Context Broker instance integration with the EDP +Location: /usr/local/lib/python3.7/dist-packages +Requires: Click, configobj, Flask, gunicorn, requests, time-uuid +Classifiers: +Entry-points: + [console_scripts] + cb-edp=cb_edp.commands:cli +``` + +#### CB-EDP API +The Integration Solution includes an API for: +1. Doing requests to the CB configured in order to get responses from + browsable HTTP requests +2. Publishing the RDF/XML file generated for harvesting by the EDP + +The deployment of this API will need something serving the solution and +other thing that grants access to this server from the Internet. The +technologies suggested to do so are +[Gunicorn](https://github.com/benoitc/gunicorn) and +[Nginx](http://nginx.org/). + +###### Gunicorn +Gunicorn should be installed by pip when installing CB-EDP. If not, +please launch: +``` +sudo pip3 install gunicorn==19.9.0 +``` +Install required components: +```commandline +sudo apt-get install python3-pip python3-dev build-essential libssl-dev libffi-dev python3-setuptools +``` +Create a service to assign Gunicorn to it with `nano` (or any other +editor you like): +```commandline +sudo nano /etc/systemd/system/cb-edp.service +``` +And paste the following contents replacing the values: +- `{sudoer-user}` for machine’s sudoer user. It is root by default. +- `{user-group}` for a group from the user specified before. You can + check it using ll command with “Location” value copied before to see + which groups have the other files and directories. It is staff by + default. +- `{solution-location}` for "Location" value copied before. +``` +[Unit] +Description=Gunicorn instance to serve CB-EDP +After=network.target + +[Service] +User={sudoer_user} +Group={user_group} +WorkingDirectory={solution-location}/cb_edp/api +ExecStart=/usr/local/bin/gunicorn --worker-class gthread --workers 3 --threads 1 --bind unix:cb-edp.sock -m 704 wsgi:app + +[Install] +WantedBy=multi-user.target +``` +Gunicorn accepts custom configuration for number of workers and threads +that these workers will use. The values set in the text above are the +recommended ones, but can be modified following these equations: +```math +N.workers=2*CPU cores+1 +``` +```math +N.threads=2*CPU cores +``` + +Now Gunicorn service can be started: +``` +sudo systemctl start cb-edp +``` + +To enable Gunicorn launching on boot, launch this command: +``` +sudo systemctl enable cb-edp +``` + +You can check application service status executing: +``` +sudo systemctl status cb-edp +``` + +###### Nginx +Install Nginx for Ubuntu: +``` +sudo apt-get install nginx +``` + +You can check Nginx service status running: +``` +sudo systemctl status nginx +``` + +Now it's turn to configure Nginx to proxy requests to the API. To do so +create new server block configuration for the already created Gunicorn +service with `nano` (or any other editor you like): +``` +sudo nano /etc/nginx/sites-available/cb-edp +``` +And paste the following contents replacing the values: +- `{your-public-ip-or-dns}` for the public IP of the server or the DNS. +- `{your-custom-route}` for the relative path where the API service will + be available. It accepts any value (context-data, open-data, cb, etc.) + and many levels (as many as the Integration Admin wants) but always + has to include the /api text at the end. Some valid examples: + - /context-data/api + - /open-data/cb/catalogue/api + - /api +- `{solution-location}` for "Location" value copied before. +``` +server { + listen 80; + server_name {your-public-ip-or-dns}; + + location /{your-custom-route}/api { + include proxy_params; + proxy_pass http://unix:{solution-location}/cb_edp/api/cb-edp.sock; + } +} +``` + +Link the file to the sites-enabled directory to enable Nginx server +block configuration: +``` +sudo ln -s /etc/nginx/sites-available/cb-edp /etc/nginx/sites-enabled +``` + +Check that there are no errors: +``` +sudo nginx -t +``` + +If it returns no issues, then restart Nginx to load the new +configuration: +``` +sudo systemctl restart nginx +``` + +Now the application should be available on the Internet. Try browsing to +http://`{your-public-ip-or-dns}`/`{your-custom-route}`/api/status + +## Built With +- [Python 3.7](https://www.python.org/) +- [Flask](http://flask.pocoo.org/) + + +## License +This project is licensed under the European Union Public License 1.2 -see the +[LICENSE](LICENSE) file for details. diff --git a/cb_edp/__init__.py b/cb_edp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cb_edp/api/__init__.py b/cb_edp/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cb_edp/api/builder.py b/cb_edp/api/builder.py new file mode 100644 index 0000000..e95c448 --- /dev/null +++ b/cb_edp/api/builder.py @@ -0,0 +1,95 @@ +from urllib.parse import urlparse +from urllib.parse import quote + +import cb_edp.conf.constants as const +from cb_edp.conf.manager import ConfigManager +from cb_edp.utils.helpers import Helpers +from cb_edp.utils.validators import Validators + + +class APIBuilder: + """ + Utils class that includes different methods to build well-formatted solution's API URLs. + """ + + @staticmethod + def build_resource_url(fiware_service, fiware_service_path, **kwargs): + """ + Builds a REST API call based on the parameters passed and the specification of solution's API. + :param fiware_service: FIWARE Service which the Data Model belongs to + :param fiware_service_path: FIWARE Service Path where Data Model is located in its service + :param kwargs: Filters to apply to the query done to Orion Context Broker (order matters) + :return: Integration API URL for querying context data + :rtype: str + """ + api_host = APIBuilder.get_host(const.INTEGRATION_API) + + orion_host = APIBuilder.get_host(const.INTEGRATION_ORION) + orion_host = APIBuilder.encode_orion(orion_host) + + url = '{api_host}/{orion_host}'.format(api_host=api_host, orion_host=orion_host) + + for name, value in kwargs.items(): + url += '/{param}/{value}'.format(param=name, value=quote(value)) + + url += APIBuilder.build_parameters(fiware_service, fiware_service_path) + + return url + + @staticmethod + def get_host(key): + """ + Obtains the host value specified in the config file. + :param str key: Key name of wanted host + :return: Well-formatted host wanted. + :rtype: str + """ + host = ConfigManager.get_value(const.MAIN_SECTION, key) + Validators.is_valid_url(key, host) + return APIBuilder.clean_host(host) + + @staticmethod + def encode_orion(orion_host): + """ + Encodes Orion's host in HTTP request compatible format. + :param str orion_host: Orion's reachable address + :return: Encoded Orion host + :rtype: str + """ + orion_host_parsed = urlparse(orion_host) + if orion_host_parsed.path: + orion_host = '{scheme}://{netloc}{path}'.format(scheme=orion_host_parsed.scheme, + netloc=orion_host_parsed.netloc, + path=quote(orion_host_parsed.path)) + return Helpers.encode_base64_url(orion_host) + + @staticmethod + def clean_host(host): + """ + Remove from a host string those characters not desired. + :param str host: Host value to treat + :return: Cleaned host string + :rtype: str + """ + if host[-1] is '/': + host = host[:-1] + return host + + @staticmethod + def build_parameters(fiware_service, fiware_service_path): + """ + Builds the parameters to add to the API call. + :param str fiware_service: FIWARE service + :param fiware_service_path: FIWARE service path + :return: Portion with HTTP request parameters + :rtype: str + """ + params = '' + if fiware_service: + params += const.API_URL_STRUCTURE_FIWARE_SERVICE.format(value=Helpers.encode_base64_url(fiware_service)) + if fiware_service_path: + if fiware_service_path[0] is '/': + fiware_service_path = fiware_service_path[1:] + params += const.API_URL_STRUCTURE_FIWARE_SERVICEPATH.format( + value=Helpers.encode_base64_url(fiware_service_path)) + return params diff --git a/cb_edp/api/main.py b/cb_edp/api/main.py new file mode 100644 index 0000000..370b411 --- /dev/null +++ b/cb_edp/api/main.py @@ -0,0 +1,198 @@ +import requests +from flask import Flask +from flask import render_template +from flask import request +from flask import Response +from werkzeug.routing import BaseConverter + +import cb_edp.conf.constants as const +from cb_edp.errors.api import APIProcessError +from cb_edp.errors.api import CouldNotReadRDFError +from cb_edp.utils.helpers import Helpers + + +class RegexConverter(BaseConverter): + def __init__(self, url_map, *items): + super(RegexConverter, self).__init__(url_map) + self.regex = items[0] + + +app = Flask(__name__) +app.url_map.converters['regex'] = RegexConverter +default_offset = 0 +default_limit = 1000 + + +@app.route(const.API_URL_STRUCTURE.format(route='/entity/')) +def by_entity(rel_path, orion, datamodel): + """ + Makes a query to Orion API filtering by entity type. + :param str rel_path: Relative path from a regex where the API is located (its value is never used) + :param str orion: Base64 encoded Orion host + :param str datamodel: Data Model (entity) by which the filter will be done + :return: Query response to Orion API call + :rtype: (str, int, ItemsView) + """ + headers = build_headers(request) + + orion_host = Helpers.decode_base64_url(orion) + url = build_url(orion_host, datamodel, request) + + return make_request(url, headers, complete=check_if_complete_request(request)) + + +@app.route(const.API_URL_STRUCTURE.format(route='/entity//location/')) +def by_location(rel_path, orion, datamodel, location): + """ + Makes a query to Orion API filtering by an entity type and a geographical area. + :param str rel_path: Relative path from a regex where the API is located (its value is never used) + :param str orion: Base64 encoded Orion host + :param str datamodel: Data Model (entity) by which the filter will be done + :param str location: Name of a geographical area (political location) to filter the query + :return: Query response to Orion API call + :rtype: (str, int, ItemsView) + """ + headers = build_headers(request) + + orion_host = Helpers.decode_base64_url(orion) + url = build_url(orion_host, datamodel, request) + url += const.API_FIWARE_URL_STRUCTURE_LOCATION.format(location=location) + + return make_request(url, headers, complete=check_if_complete_request(request)) + + +@app.route(const.API_URL_STRUCTURE.format(route=const.RDF_FILE_NAME)) +def rdf(rel_path): + """ + Reads the RDF file generated by the integration and returns it in request's response. + :param str rel_path: Relative path from a regex where the API is located (its value is never used) + :return: Generated RDF/XML file + :rtype: Response + :raises CouldNotReadRDFError APIProcessError: + """ + try: + with open(Helpers.get_rdf_path(), 'r') as file: + rdf_xml = file.read() + response = Response() + response.mimetype = 'application/rdf+xml' + response.data = rdf_xml + return response + except FileNotFoundError: + raise CouldNotReadRDFError + except: + raise APIProcessError + + +@app.route(const.API_URL_STRUCTURE.format(route=const.API_URL_STATUS)) +def status(rel_path): + """ + Dummy method that returns a plain response just to check that the API works fine. + :param str rel_path: Relative path from a regex where the API is located (its value is never used) + :return: Plain empty response + :rtype: Response + """ + import cb_edp.utils.messages as msg + return Response(msg.API_STATUS_OK) + + +@app.errorhandler(CouldNotReadRDFError) +@app.errorhandler(APIProcessError) +def handle_custom_api_errors(exception): + """ + Exception handler for those custom errors produced by the Integration Solution API.te + :param CouldNotReadRDFError or APIProcessError exception: Custom error raised by APIs methods + :return: Error page template with a brief error description + """ + return render_template('error.html', error_code=exception.status_code, title=exception.short_message, + message=exception.message) + + +def build_url(host, entity, request): + """ + Generates the URL to make the call to Orion API filtering by an entity type. + :param str host: Host address where Orion is reachable + :param str entity: Entity name by which the filter will be done + :param Request request: Request object representing the one made by the user + :return: Well-formed URL to Orion API + :rtype: str + """ + if host[-1] is '/': + host = host[:-1] + + offset = request.args.get('offset') + limit = request.args.get('limit') + if not offset: + offset = default_offset + if not limit: + limit = default_limit + + return const.API_FIWARE_URL_STRUCTURE.format(host=host, entity=entity, offset=offset, limit=limit) + + +def build_headers(request): + """ + Builds the headers to include in the API call to Orion based in received request. + :param Request request: Instance of the petition used to query Orion API + :return: Headers to include to the request + :rtype: dict + :raises APIProcessError: + """ + headers = {'Accept': 'application/json'} + fiware_service = request.args.get('fs') + fiware_service_path = request.args.get('fp') + if fiware_service: + headers[const.API_FIWARE_SERVICE] = Helpers.decode_base64_url(fiware_service) + if fiware_service_path: + headers[const.API_FIWARE_SERVICEPATH] = '/{service_path}'.format( + service_path=Helpers.decode_base64_url(fiware_service_path)) + else: + if fiware_service_path: + raise APIProcessError + return headers + + +def make_request(url, headers, method='get', complete=True): + """ + Makes a query and returns its response. + :param str url: URL where the call is made + :param dict headers: Orion's required headers to make a proper API call + :param str method: HTTP method used in the request (default 'get') + :param bool complete: Flag that indicates if the request should return every entity by the filter (default 'True') + :return: Query response to Orion API call + :rtype: (str, int, collections.abc.ItemsView) + """ + response = requests.request(method, url, headers=headers) + content = response.content + + if complete: + limit = default_limit + offset = default_offset + default_limit + + count = int(response.headers['Fiware-Total-Count']) + if count > limit: + import json, re + + content = json.loads(content) + url = re.sub(r'(limit=)\d+', '\g<1>{number}'.format(number=limit), url) + while offset < count: + url = re.sub(r'(offset=)\d+', '\g<1>{number}'.format(number=offset), url) + response = requests.request(method, url, headers=headers) + content += json.loads(response.content) + offset += limit + content = json.dumps(content) + + return content, response.status_code, response.headers.items() + + +def check_if_complete_request(request): + """ + Verifies if the request done by the user specifies any of the pagination parameters. + :param Request request: Request object representing the one made by the user + :return: If the user specifies one of the pagination URL parameters + :rtype: bool + """ + return not request.args.get('offset') and not request.args.get('limit') + + +if __name__ == '__main__': + app.run() diff --git a/cb_edp/api/templates/error.html b/cb_edp/api/templates/error.html new file mode 100644 index 0000000..83e00fb --- /dev/null +++ b/cb_edp/api/templates/error.html @@ -0,0 +1,14 @@ + + + + + + API call error: {{ error_code }} + + + +

{{ title }} [{{ error_code }}]

+

{{ message }}

+ + + \ No newline at end of file diff --git a/cb_edp/api/wsgi.py b/cb_edp/api/wsgi.py new file mode 100644 index 0000000..2a844c9 --- /dev/null +++ b/cb_edp/api/wsgi.py @@ -0,0 +1,4 @@ +from cb_edp.api.main import app + +if __name__ == '__main__': + app.run() \ No newline at end of file diff --git a/cb_edp/commands.py b/cb_edp/commands.py new file mode 100644 index 0000000..27e9305 --- /dev/null +++ b/cb_edp/commands.py @@ -0,0 +1,204 @@ +import os + +import click + +import cb_edp.conf.constants as const +import cb_edp.utils.messages as msg +from cb_edp.edp import EDP +from cb_edp.utils.helpers import Helpers + + +class CommandsHelpSorter(click.Group): + def __init__(self, *args, **kwargs): + self.help_priorities = {} + super(CommandsHelpSorter, self).__init__(*args, **kwargs) + + def get_help(self, ctx): + self.list_commands = self.list_commands_for_help + return super(CommandsHelpSorter, self).get_help(ctx) + + def list_commands_for_help(self, ctx): + commands = super(CommandsHelpSorter, self).list_commands(ctx) + return (c[1] for c in sorted( + (self.help_priorities.get(command, 1), command) + for command in commands)) + + def command(self, *args, **kwargs): + help_priority = kwargs.pop('help_priority', 1) + help_priorities = self.help_priorities + + def decorator(f): + cmd = super(CommandsHelpSorter, self).command(*args, **kwargs)(f) + help_priorities[cmd.name] = help_priority + return cmd + + return decorator + + +class MultiValueCommandOption(click.Option): + def __init__(self, *args, **kwargs): + self.save_other_options = kwargs.pop('save_other_options', True) + nargs = kwargs.pop('nargs', -1) + assert nargs == -1, 'nargs, if set, must be -1 not {}'.format(nargs) + super(MultiValueCommandOption, self).__init__(*args, **kwargs) + self._previous_parser_process = None + self._eat_all_parser = None + + def add_to_parser(self, parser, ctx): + + def parser_process(value, state): + done = False + value = [value] + if self.save_other_options: + while state.rargs and not done: + for prefix in self._eat_all_parser.prefixes: + if state.rargs[0].startswith(prefix): + done = True + if not done: + value.append(state.rargs.pop(0)) + else: + value += state.rargs + state.rargs[:] = [] + value = tuple(value) + + self._previous_parser_process(value, state) + + retval = super(MultiValueCommandOption, self).add_to_parser(parser, ctx) + for name in self.opts: + our_parser = parser._long_opt.get(name) or parser._short_opt.get(name) + if our_parser: + self._eat_all_parser = our_parser + self._previous_parser_process = our_parser.process + our_parser.process = parser_process + break + return retval + + +@click.group(cls=CommandsHelpSorter) +@click.option('--config', '-c', type=click.Path(), default=const.CONFIG_FILE_DEFAULT_PATH, + help=msg.COMMANDS_HELP_CONFIG_FILE) +@click.pass_context +def cli(ctx, config): + """ + This application integrates the context data of the CEF Context Broker (CB) with the European Data Portal (EDP). + It makes an RDF graph formatted as an XML document from the Data Models specified by the user. For these Data Models + the integration will create a catalogue, as many datasets as Data Models provided by the command and the + distributions (resources) based on the configuration file too for each of the previous datasets. + + CB-EDP application works with a server that manages the proxy API used for distributions' URLs. This API will + provide the user with an URL showing the RDF file once it is created. + + The available commands allow the user to create a new RDF, update and/or remove existing datasets or add new ones. + The user can also create the configuration file from scratch. + + Use cb-edp COMMAND --help for more details about each command. + """ + ctx.obj = {'config': config} + + +@cli.command(name='integrate', help_priority=1) +@click.option('--datamodels', '-d', default=const.DEFAULT_DATAMODEL_OPTION_COMMAND, show_default=True, required=True, + help=msg.COMMANDS_HELP_DATAMODELS.format(command=const.DEFAULT_DATAMODEL_OPTION_COMMAND), + cls=MultiValueCommandOption) +@click.option('--overwrite', '-o', is_flag=True, help=msg.COMMANDS_HELP_OVERWRITE) +@click.pass_context +def integrate(ctx, datamodels, overwrite): + """ + Integrates new RDF. + + Integrates the Data Models given as parameters and generates a new RDF/XML file with the resultant datasets and + distributions. + + If the RDF file already exists, a confirmation will be prompted (ignored in case of adding the --overwrite flag). + """ + edp = EDP(ctx.obj['config']) + + if type(datamodels) is str: + datamodels = (datamodels,) + + if overwrite: + edp.integrate(datamodels) + elif os.path.exists(Helpers.get_rdf_path()): + if click.confirm(msg.COMMANDS_INTEGRATE_PROMPT): + edp.integrate(datamodels) + else: + edp.integrate(datamodels) + + +@cli.command(name='modify', help_priority=2) +@click.option('--datamodels', '-d', required=True, + help=msg.COMMANDS_HELP_DATAMODELS.format(command=const.DEFAULT_DATAMODEL_OPTION_COMMAND), + cls=MultiValueCommandOption) +@click.pass_context +def modify(ctx, datamodels): + """ + Modifies integrated RDF. + + Modifies the previously integrated RDF/XML file adding or updating the Data Models given as parameters. + + The RDF file will be replaced by the new one after the execution. + """ + edp = EDP(ctx.obj['config']) + edp.modify(datamodels) + + +@cli.command(name="delete", help_priority=3) +@click.option('--datamodels', '-d', + help=msg.COMMANDS_HELP_DATAMODELS.format(command=const.DEFAULT_DATAMODEL_OPTION_COMMAND), + cls=MultiValueCommandOption) +@click.pass_context +def delete(ctx, datamodels): + """ + Deletes Data Models from RDF. + + Removes Data Models given as parameters from the already generated RDF/XML file. The Data Model to delete has to be + integrated before trying to remove it. + + The RDF file will be replaced by the new one after the execution. + """ + edp = EDP(ctx.obj['config']) + edp.delete(datamodels) + + +@cli.command(name='new_config', help_priority=4) +@click.option('--overwrite', '-o', is_flag=True, help=msg.COMMANDS_HELP_OVERWRITE) +@click.pass_context +def new_config(ctx, overwrite): + """ + Creates a configuration file from template. + + It generates an empty configuration file from a template into given path. In case the user does not specify the + path using --config or -c option, the file will be written in the default location: /etc/cb_edp.ini + + If the configuration file already exists, a confirmation will be prompted (ignored in case of adding --overwrite flag). + """ + path = ctx.obj['config'] + if overwrite: + EDP.generate_config_file(path) + elif os.path.exists(path): + if click.confirm(msg.COMMANDS_NEW_CONFIG_PROMPT): + EDP.generate_config_file(path) + else: + EDP.generate_config_file(path) + + +@cli.command(name='show_integrated', help_priority=5) +def show_integrated_datamodels(): + """ + Shows already integrated Data Models. + + Prints which are the Data Models present in the RDF/XML. It is necessary to launch the integration at least once to + get some output here. + """ + datamodels = EDP.get_integrated_datamodels() + if not len(datamodels): + click.echo(msg.COMMANDS_SHOW_INTEGRATED_DATAMODELS_EMPTY) + else: + click.echo(msg.COMMANDS_SHOW_INTEGRATED_DATAMODELS) + for datamodel in datamodels: + click.echo('\t' + datamodel) + click.echo() + + +if __name__ == '__main__': + cli(obj={}) diff --git a/cb_edp/conf/__init__.py b/cb_edp/conf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cb_edp/conf/constants.py b/cb_edp/conf/constants.py new file mode 100644 index 0000000..d3e3e3a --- /dev/null +++ b/cb_edp/conf/constants.py @@ -0,0 +1,223 @@ +MAIN_SECTION = 'main' + +URI_STRUCTURE = 'uri.structure' +URI_HOST = 'uri.host' +INTEGRATION_API = 'integration.api' +INTEGRATION_ORION = 'integration.orion' + +CATALOGUE_SECTION = 'catalogue' +CATALOGUE_TITLE = 'title' +CATALOGUE_DESCRIPTION = 'description' +CATALOGUE_PUBLISHER_NAME = 'publisher-name' +CATALOGUE_PUBLISHER_URI = 'publisher-uri' +CATALOGUE_PUBLISHER_HOMEPAGE = 'publisher-homepage' +CATALOGUE_PUBLISHER_TYPE = 'publisher-type' +CATALOGUE_HOMEPAGE = 'homepage' +CATALOGUE_ID = 'id' + +DATAMODEL_SECTION = '[datamodel section]' +DATAMODEL_TYPE = 'datamodel.type' +DATAMODEL_FIWARE_SERVICE = 'datamodel.service' +DATAMODEL_FIWARE_SERVICE_PATH = 'datamodel.service-path' +DATASET_TITLE = 'dataset.title' +DATASET_DESCRIPTION = 'dataset.description' +DATASET_CONTACT_POINT = 'dataset.contact-point' +DATASET_KEYWORDS = 'dataset.keywords' +DATASET_PUBLISHER_NAME = 'dataset.publisher-name' +DATASET_PUBLISHER_URI = 'dataset.publisher-uri' +DATASET_PUBLISHER_HOMEPAGE = 'dataset.publisher-homepage' +DATASET_PUBLISHER_TYPE = 'dataset.publisher-type' +DATASET_THEMES = 'dataset.themes' +DATASET_ACCESS_RIGHTS = 'dataset.access-rights' +DATASET_PERIODICITY = 'dataset.periodicity' +DATASET_SPATIAL = 'dataset.spatial' +DATASET_LANDING_PAGE = 'dataset.landing-page' +DATASET_ALLOCATION = 'dataset.allocation' +DATASET_ID = 'dataset.id' +RESOURCE_LICENSE = 'distribution.license' +RESOURCE_LOCATIONS = 'distribution.locations' + +DATAMODELS = { + 'Alerts': { + 'models': ['Alert'], + 'allocation': ['location', 'category'] + }, + 'Parks & Gardens': { + 'models': ['Garden', 'GreenspaceRecord', 'FlowerBed'], + 'allocation': ['location', 'category'] + }, + 'Environment': { + 'models': ['AeroAllergenObserved', 'AirQualityObserved', 'WaterQualityObserved', 'NoiseLevelObserved'], + 'allocation': ['location', 'category'] + }, + 'Point of Interest': { + 'models': ['PointOfInterest', 'Beach', 'Museum'], + 'allocation': ['location', 'category'] + }, + 'Civic Issue Tracking': { + 'models': ['Open311:ServiceType', 'Open311:ServiceRequest'], + 'allocation': ['category'] + }, + 'Street Lightning': { + 'models': ['Streetlight', 'StreetlightModel', 'StreetlightGroup', 'StreetlightControlCabinet'], + 'allocation': ['location', 'category'] + }, + 'Device': { + 'models': ['Device', 'DeviceModel'], + 'allocation': ['category'] + }, + 'Transportation': { + 'models': ['BikeHireDockingStation', 'Road', 'RoadSegment', 'TrafficFlowObserved', 'Vehicle', 'VehicleModel', + 'EVChargingStation'], + 'allocation': ['location', 'category'] + }, + 'Indicators': { + 'models': ['KeyPerformanceIndicator'], + 'allocation': ['location', 'category'] + }, + 'Waste Management': { + 'models': ['WasteContainerIsle', 'WasteContainerModel', 'WasteContainer'], + 'allocation': ['location', 'category'] + }, + 'Parking': { + 'models': ['OffStreetParking', 'OnStreetParking', 'ParkingGroup', 'ParkingAccess', 'ParkingSpot'], + 'allocation': ['location', 'category'] + }, + 'Weather': { + 'models': ['WeatherObserved', 'WeatherForecast'], + 'allocation': ['location', 'category'] + }, +} +DATAMODELS_DEFAULT = { + 'models': '', + 'allocation': ['category', 'location'] +} + +DATASET_THEMES_RELATION = { + 'agriculture': 'http://publications.europa.eu/resource/authority/data-theme/AGRI', + 'education': 'http://publications.europa.eu/resource/authority/data-theme/EDUC', + 'environment': 'http://publications.europa.eu/resource/authority/data-theme/ENVI', + 'energy': 'http://publications.europa.eu/resource/authority/data-theme/ENER', + 'transport': 'http://publications.europa.eu/resource/authority/data-theme/TRAN', + 'technology': 'http://publications.europa.eu/resource/authority/data-theme/TECH', + 'economy': 'http://publications.europa.eu/resource/authority/data-theme/ECON', + 'social': 'http://publications.europa.eu/resource/authority/data-theme/SOCI', + 'health': 'http://publications.europa.eu/resource/authority/data-theme/HEAL', + 'government': 'http://publications.europa.eu/resource/authority/data-theme/GOVE', + 'regions': 'http://publications.europa.eu/resource/authority/data-theme/REGI', + 'justice': 'http://publications.europa.eu/resource/authority/data-theme/JUST', + 'international': 'http://publications.europa.eu/resource/authority/data-theme/INTR', + 'provisional': 'http://publications.europa.eu/resource/authority/data-theme/OP_DATPRO' +} +DATASET_FREQUENCY_RELATION = { + 'triennial': 'http://publications.europa.eu/resource/authority/frequency/TRIENNIAL', + 'biennial': 'http://publications.europa.eu/resource/authority/frequency/BIENNIAL', + 'annual': 'http://publications.europa.eu/resource/authority/frequency/ANNUAL', + 'semiannual': 'http://publications.europa.eu/resource/authority/frequency/ANNUAL_2', + 'three_times_year': 'http://publications.europa.eu/resource/authority/frequency/ANNUAL_3', + 'quarterly': 'http://publications.europa.eu/resource/authority/frequency/QUARTERLY', + 'bimonthly': 'http://publications.europa.eu/resource/authority/frequency/BIMONTHLY', + 'monthly': 'http://publications.europa.eu/resource/authority/frequency/MONTHLY', + 'semimonthly': 'http://publications.europa.eu/resource/authority/frequency/MONTHLY_2', + 'biweekly': 'http://publications.europa.eu/resource/authority/frequency/BIWEEKLY', + 'three_times_month': 'http://publications.europa.eu/resource/authority/frequency/MONTHLY_3', + 'weekly': 'http://publications.europa.eu/resource/authority/frequency/WEEKLY', + 'semiweekly': 'http://publications.europa.eu/resource/authority/frequency/WEEKLY_2', + 'three_times_week': 'http://publications.europa.eu/resource/authority/frequency/WEEKLY_3', + 'daily': 'http://publications.europa.eu/resource/authority/frequency/DAILY', + 'continuously': 'http://publications.europa.eu/resource/authority/frequency/UPDATE_CONT', + 'irregular': 'http://publications.europa.eu/resource/authority/frequency/IRREG', + 'unknown': 'http://publications.europa.eu/resource/authority/frequency/UNKNOWN', + 'other': 'http://publications.europa.eu/resource/authority/frequency/OTHER', + 'twice_day': 'http://publications.europa.eu/resource/authority/frequency/DAILY_2', + 'continuous': 'http://publications.europa.eu/resource/authority/frequency/CONT', + 'never': 'http://publications.europa.eu/resource/authority/frequency/NEVER', + 'quadrennial': 'http://publications.europa.eu/resource/authority/frequency/QUADRENNIAL', + 'quinquennial': 'http://publications.europa.eu/resource/authority/frequency/QUINQUENNIAL', + 'hourly': 'http://publications.europa.eu/resource/authority/frequency/HOURLY', + 'decennial': 'http://publications.europa.eu/resource/authority/frequency/DECENNIAL', + 'provisional': 'http://publications.europa.eu/resource/authority/frequency/OP_DATPRO' +} +DATASET_ACCESS_RIGHTS_RELATION = { + 'public': 'http://publications.europa.eu/resource/authority/access-right/PUBLIC', + 'restricted': 'http://publications.europa.eu/resource/authority/access-right/RESTRICTED', + 'non_public': 'http://publications.europa.eu/resource/authority/access-right/NON_PUBLIC', + 'provisional': 'http://publications.europa.eu/resource/authority/access-right/OP_DATPRO' +} +PUBLISHER_TYPE_RELATION = { + 'academia_scientific_org': 'http://purl.org/adms/publishertype/Academia-ScientificOrganisation', + 'company': 'http://purl.org/adms/publishertype/Company', + 'industry_consortium': 'http://purl.org/adms/publishertype/IndustryConsortium', + 'local_authority': 'http://purl.org/adms/publishertype/LocalAuthority', + 'national_authority': 'http://purl.org/adms/publishertype/NationalAuthority', + 'nongovernmental_org': 'http://purl.org/adms/publishertype/NonGovernmentalOrganisation', + 'nonprofit_org': 'http://purl.org/adms/publishertype/NonProfitOrganisation', + 'private_individual': 'http://purl.org/adms/publishertype/PrivateIndividual(s)', + 'regional_authority': 'http://purl.org/adms/publishertype/RegionalAuthority', + 'standardisation_body': 'http://purl.org/adms/publishertype/StandardisationBody', + 'supranational_authority': 'http://purl.org/adms/publishertype/SupraNationalAuthority' +} + +RDF_CATALOGUE = 'dcat:Catalog' +RDF_CATALOGUE_DATASET = 'dcat:dataset' +RDF_DATASET = 'dcat:Dataset' +RDF_DATASET_RESOURCE = 'dcat:distribution' +RDF_RESOURCE = 'dcat:Distribution' +RDF_ORGANIZATION = 'foaf:Organization' +RDF_ATTRIBUTE_ABOUT = 'rdf:about' +RDF_ATTRIBUTE_RESOURCE = 'rdf:resource' +RDF_IDENTIFIER = 'dct:identifier' +RDF_TITLE = 'dct:title' +RDF_DESCRIPTION = 'dct:description' +RDF_PUBLISHER = 'dct:publisher' +RDF_HOMEPAGE = 'foaf:homepage' +RDF_TYPE = 'rdf:type' +RDF_ISSUED = 'dct:issued' +RDF_MODIFIED = 'dct:modified' +RDF_THEME = 'dcat:theme' +RDF_KEYWORD = 'dcat:keyword' +RDF_CONTACT_POINT = 'dcat:contactPoint' +RDF_CONTACT_POINT_NAME = 'vcard:fn' +RDF_CONTACT_POINT_EMAIL = 'vcard:hasEmail' +RDF_PERIODICITY = 'dct:accrualPeriodicity' +RDF_RIGHTS = 'dct:accessRights' +RDF_LANDING_PAGE = 'dcat:landingPage' +RDF_SPATIAL = 'dct:spatial' +RDF_SPATIAL_GEOMETRY = 'locn:geometry' +RDF_ACCESS_URL = 'dcat:accessURL' +RDF_DOWNLOAD_URL = 'dcat:downloadURL' +RDF_LICENSE = 'dct:license' +RDF_ORGANIZATION_NAME = 'foaf:name' +RDF_ELEMENT_XPATH = './/{element}' +RDF_ATTRIBUTE_XPATH = '{element}[@{attribute}="{value}"]' + +from enum import Enum + + +class Model(Enum): + CATALOGUE = 'catalogue' + DATASET = 'dataset' + RESOURCE = 'distribution' + +class Allocation(Enum): + CATEGORY = 'category' + LOCATION = 'location' + +API_FIWARE_SERVICE = 'fiware-service' +API_FIWARE_SERVICEPATH = 'fiware-servicepath' +API_FIWARE_URL_STRUCTURE = '{host}/v2/entities?type={entity}&options=keyValues&options=count&offset={offset}&limit={limit}' +API_FIWARE_URL_STRUCTURE_LOCATION = '&q=address.addressRegion=={location}&q=address.addressLocality=={location}' +API_URL_STRUCTURE_FIWARE_SERVICE = '?fs={value}' +API_URL_STRUCTURE_FIWARE_SERVICEPATH = '&fp={value}' +API_URL_STRUCTURE = '/api/{route}' +API_URL_STATUS = 'status' +CONFIG_FILE_DEFAULT_PATH = '/etc/cb_edp.ini' +CONFIG_FILE_TEMPLATE_PATH = '/conf/template.ini' +CONFIG_FILE_DATASETS_IDS_PATH = '/conf/integrated.ini' +RDF_FILE_NAME = 'catalogue.rdf' +RDF_FILE_PATH = '/api/' + RDF_FILE_NAME +RDF_FILE_TEMPLATE_PATH = '/rdf/template.xml' +URI_STRUCTURE_DEFAULT = 'http://{host}/cb/' + +DEFAULT_DATAMODEL_OPTION_COMMAND = 'all' +SIMPLE_DATE_FORMAT = '%H:%M:%S' diff --git a/cb_edp/conf/integrated.ini b/cb_edp/conf/integrated.ini new file mode 100644 index 0000000..e69de29 diff --git a/cb_edp/conf/manager.py b/cb_edp/conf/manager.py new file mode 100644 index 0000000..9b9abbe --- /dev/null +++ b/cb_edp/conf/manager.py @@ -0,0 +1,190 @@ +import os + +from configobj import ConfigObj +from configobj import ConfigObjError + +import cb_edp.conf.constants as const +from cb_edp.errors.config import ConfigFilePathError +from cb_edp.errors.config import NoIDForDataModelError +from cb_edp.errors.config import SectionKeyError +from cb_edp.utils.helpers import Helpers + + +class ConfigManager: + """ + Configuration files manager class. Implements the methods needed to work with the main solution's configuration + file and the file used to store datasets' IDs. It works as a singleton. + """ + __instance = None + __datasets_ids = None + __config_file_path = None + + def __init__(self, config_file_path): + """ + Instantiate the ConfigManager class. + :param str config_file_path: Path where the config file is located. + :raises: ConfigObjError ConfigFilePathError + """ + try: + + self.__config = ConfigObj(config_file_path, write_empty_values=True, list_values=False, encoding='utf8', + raise_errors=True) + except ConfigObjError as error: + raise ConfigObjError(error.msg.strip('.')) + except Exception: + raise ConfigFilePathError(config_file_path) + + @classmethod + def get_instance(cls): + """ + Singleton method that retrieves the main ConfigManager instance. + If it is not instantiated yet, it does it with the __config_file_path that must be previously specified. + :return: The ConfigManager class singleton. + :rtype: ConfigManager + """ + if cls.__instance is None: + cls.__instance = ConfigManager(cls.__config_file_path) + return cls.__instance + + @classmethod + def get_datasets_ids_instance(cls): + """ + Singleton method that retrieves the ConfigManager instance for the datasets IDs storing file. + If it is not instantiated yet, it does it with the the static defined path. + :return: The ConfigManager class singleton. + :rtype: ConfigManager + """ + if cls.__datasets_ids is None: + cls.__datasets_ids = ConfigManager(Helpers.get_datasets_ids_file_path()) + return cls.__datasets_ids + + @classmethod + def set_config_path(cls, config_file_path): + """ + Sets the value for the configuration file location. + :param str config_file_path: Path where the config file is. + :return: None + """ + if os.path.isdir('/'.join(config_file_path.split('/')[:-1])): + if os.path.exists(config_file_path): + cls.__config_file_path = config_file_path + return + raise ConfigFilePathError(config_file_path) + + @classmethod + def _get_configobj(cls, config_manager): + """ + Returns the previously set property __config parser for a config manager. + :param ConfigManager config_manager: Instance of the config manager + :return: The instance of the ConfigObj class + :rtype: ConfigObj + """ + return config_manager.__config + + @classmethod + def set_value(cls, section, key, value): + """ + Given a section, a key and a value, writes the value to the corresponding section-key in the file. + :param str section: Section where the key is located in config file. + :param str key: Name of the key corresponding to the value to be added. + :param str value: value to be added in the corresponding section-key. + :return: None + """ + cls._get_configobj(cls.get_instance())[section][key] = value + + @classmethod + def get_value(cls, section, key, default=''): + """ + Reads a value from the config file. + Raises a KeyError exception if either section or key are not preset. + :param section: Section where the key is located in config file. + :param key: Name of the key whose value has to be returned. + :param default: If the field is empty the method will return this value. + :return: The value of the corresponding section-key. + :rtype: str + :raises SectionKeyError: + """ + try: + value = cls._get_configobj(cls.get_instance())[section][key] + return value if value else default + except KeyError: + raise SectionKeyError(section, key) + + @classmethod + def get_keys(cls, section): + """ + Returns the keys present in the configuration file for a specific section. + :param str section: Section of the config file whose keys have to be returned + :return: Collection of keys for a given section + :rtype: list[str] + """ + return [key for key in cls._get_configobj(cls.get_instance())[section]] + + @classmethod + def get_datamodels(cls): + """ + Returns the entire list of Data Models specified in the configuration file. + :return: Data Models written in config file + :rtype: list[str] + """ + sections = cls._get_configobj(cls.get_instance()).keys() + for section in [const.MAIN_SECTION, const.CATALOGUE_SECTION]: + sections.remove(section) + return sections + + @classmethod + def update_file(cls): + """ + Writes all the changes made using the set_ methods in the config file. + :return: None + """ + cls._get_configobj(cls.get_instance()).write() + + @classmethod + def get_dataset_id(cls, datamodel): + """ + Reads from the datasets IDs file the ID for a Data Model. + :param str datamodel: Data Model to look for + :return: ID of the given Data Model + :rtype: str + """ + ids = cls._get_configobj(cls.get_datasets_ids_instance()) + if datamodel in ids: + return ids[datamodel] + else: + return '' + + @classmethod + def save_dataset_id(cls, datamodel, id): + """ + Saves the ID of a dataset/Data Model in the datasets IDs file writing it on disk. + :param str datamodel: Data Model whose ID will be stored + :param str id: Dataset ID to store + :return: None + """ + ids = cls._get_configobj(cls.get_datasets_ids_instance()) + ids[datamodel] = id + ids.write() + + @classmethod + def remove_dataset_id(cls, datamodel): + """ + Removes from the datasets IDs file the entry of a given Data Model. + :param str datamodel: Data Model whose ID will be removed + :return: None + """ + ids = cls._get_configobj(cls.get_datasets_ids_instance()) + if datamodel not in ids: + raise NoIDForDataModelError(datamodel) + ids.pop(datamodel) + ids.write() + + @classmethod + def get_integrated_datasets(cls): + """ + Returns a collection with the Data Models already integrated. + :return: List with the Data Models integrated + :rtype: list[str] + """ + ids = cls._get_configobj(cls.get_datasets_ids_instance()) + return list(ids.keys()) diff --git a/cb_edp/conf/template.ini b/cb_edp/conf/template.ini new file mode 100644 index 0000000..9a9b117 --- /dev/null +++ b/cb_edp/conf/template.ini @@ -0,0 +1,104 @@ +[main] +# How the URI for the metadata will be formed +# {host} field indicates where the uri.host var of this config file will be set and must be present +# For example, for uri.host value 'europeandataportal.eu', the URI will be like: +# http://europeandataportal.eu/cb/dataset/ +uri.structure = http://{host}/cb/ +# {host} value for uri.structure field (can include a port) +uri.host = +# URL where solution's API is deployed (without final slash) +# It must contain the host (IP or DNS) and the relative path where the API was deployed in NGINX +# e.g. http://www.europeandataportal.eu/context-data/api or http://217.172.12.169/api +integration.api = +# URL where Orion is deployed (without final slash) +integration.orion = + +[catalogue] +# Datasets catalogue title (mandatory) +title = +# Datasets catalogue description (mandatory) +description = +# URI of catalogue's publisher (mandatory) +# It must be the same used in other applications or previous harvestings in the EDP +# It is important to keep same URIs to link context data with other information already published in the EDP +publisher-uri = +# Name of catalogue's publisher (mandatory) +publisher-name = +# URL to catalogue's publisher homepage +publisher-homepage = +# Publisher clasification (unique value) +# These types are the ones defined by DCAT-AP +# Possible values: +# academia_scientific_org company industry_consortium local_authority +# national_authority nongovernmental_org nonprofit_org private_individual +# regional_authority standardisation_body supranational_authority +publisher-type = +# URL where the catalogue is located +homepage = + +[datamodel.template] +# Type name of the Data Model/entity (mandatory) +datamodel.type = +# The service where these entities are grouped +datamodel.service = +# The path where the entities are located inside the service specified +datamodel.service-path = +# The dataset title must be in English (mandatory) +dataset.title = +# The dataset description must be in English (mandatory) +# If the description is multi-lined you must write it between ''' at phrase start and end +dataset.description = +# Publisher contact email +dataset.contact-point = +# Keywords (tags) of related subjects lower case recommended (percent sign separated) +dataset.keywords = +# URI of dataset's publisher +# It must be the same used in other applications or previous harvestings in the EDP +# It is important to keep same URIs to link context data with other information already published in the EDP +dataset.publisher-uri = +# Name of dataset's publisher +dataset.publisher-name = +# Publisher classification (unique value) +# These types are the ones defined by DCAT-AP +# Possible values: +# academia_scientific_org company industry_consortium local_authority +# national_authority nongovernmental_org nonprofit_org private_individual +# regional_authority standardisation_body supranational_authority +dataset.publisher-type = +# URL to the catalogue publisher's homepage +dataset.publisher-homepage = +# Themes that apply to this dataset's Data Model (separated by blank) +# These themes are the ones defined by DCAT-AP +# Possible values: +# agriculture education environment energy transport technology economy +# social health government regions justice international provisional +dataset.themes = +# Rights applicable for dataset accessing (unique value) +# These types are the ones defined by DCAT-AP +# Possible values: +# public restricted non_public provisional +dataset.access-rights = +# Periodicity within the data from the CB is updated that apply to this dataset's Data Model (unique value) +# These frequencies are the ones defined by DCAT-AP +# Possible values: +# triennial biennial annual semiannual three_times_year quarterly bimonthly +# monthly semimonthly biweekly three_times_month weekly semiweekly three_times_week +# daily continuously irregular unknown other twice_day continuous never quadrennial +# quinquennial hourly decennial provisional +dataset.periodicity = +# A JSON file containing the polygon info of the area covered by the dataset +# It must be an absolute path to the file +# The JSON file can be obtained from: +# http://geojson.io +dataset.spatial = +# A webpage that provides access to the dataset, its distributions and/or additional information +dataset.landing-page = +# Possible distribution values: +# location and category (only if you do not specify a global Data Model -e.g. WeatherForecast instead of Weather) +dataset.allocation = +# Values for the location distribution (percent sign separated) +# The values set here should appear as address metadata in CBs data +distribution.locations = +# URL to distributions license information +# e.g. http://creativecommons.org/licenses/by/4.0/ +distribution.license = diff --git a/cb_edp/edp.py b/cb_edp/edp.py new file mode 100644 index 0000000..f3b56fd --- /dev/null +++ b/cb_edp/edp.py @@ -0,0 +1,177 @@ +import logging +import sys +from datetime import datetime +from shutil import copyfile + +import requests + +import cb_edp.conf.constants as const +import cb_edp.utils.messages as msg +from cb_edp.conf.manager import ConfigManager +from cb_edp.errors.rdf import LastDatasetError +from cb_edp.model.catalogue import Catalogue +from cb_edp.model.dataset import Dataset +from cb_edp.rdf.serializer import Serializer +from cb_edp.utils.helpers import Helpers +from cb_edp.utils.helpers import Validators + + +class EDP(object): + """ + Solution's core class. It provides de necessary methods to perform the features provided to the user. + """ + + def __init__(self, file_path): + """ + Instantiate the EDP core class. + :param str file_path: Path to the configuration file + """ + try: + Helpers.instantiate_logger() + + logging.debug(msg.EDP_INITIALIZING) + logging.debug(msg.EDP_READING_CONFIG.format(path=file_path)) + ConfigManager.set_config_path(file_path) + + Validators.is_informed(const.URI_STRUCTURE, ConfigManager.get_value(const.MAIN_SECTION, const.URI_STRUCTURE)) + Validators.is_informed(const.URI_HOST, ConfigManager.get_value(const.MAIN_SECTION, const.URI_HOST)) + integration_api = ConfigManager.get_value(const.MAIN_SECTION, const.INTEGRATION_API) + Validators.is_informed(const.INTEGRATION_API, integration_api) + Validators.is_valid_url(const.INTEGRATION_API, integration_api) + Validators.is_informed(const.MAIN_SECTION, ConfigManager.get_value(const.MAIN_SECTION, const.INTEGRATION_ORION)) + + integration_api = integration_api.strip('/') + logging.debug(msg.EDP_CHECK_API_STATUS.format(host=integration_api)) + response = requests.get('{host}/{route}'.format(host=integration_api, route=const.API_URL_STATUS)) + if response.status_code != 200: + logging.warning(msg.EDP_API_STATUS_DOWN.format(host=integration_api)) + except ValueError: + import click + click.echo(msg.EDP_ERROR_INSTANTIATING_LOGGER.format( + date=datetime.strftime(datetime.now(), const.SIMPLE_DATE_FORMAT), script=__name__)) + sys.exit() + except Exception as error: + logging.error(error) + sys.exit() + + def integrate(self, datamodels): + """ + Core function that integrates a new RDF file with a collection of Data Models. + It removes every previously stored dataset ID. Then, it checks if the param passed is the Data Models' + collection or 'all' value (to integrate every Data Model in config file). At last, it serializes the Data Models + passed by and writes the entire new RDF into the filesystem. + :param tuple datamodels: Data Models that will be added to the RDF file + :return: None + """ + logging.info(msg.EDP_INTEGRATION_START.format(datamodels=', '.join(datamodels))) + + try: + already_integrated = ConfigManager.get_integrated_datasets() + for dataset in already_integrated: + ConfigManager.remove_dataset_id(dataset) + + datamodels = EDP.check_datamodels_parameter(datamodels, False) + catalogue = Catalogue(datamodels) + rdf = Serializer.serialize_rdf_create(catalogue) + Serializer.write_rdf(rdf) + logging.info(msg.EDP_INTEGRATION_FINISHED_OK) + except Exception as error: + logging.error(error) + logging.info(msg.EDP_INTEGRATION_FINISHED_KO) + + def modify(self, datamodels): + """ + Core function that modifies an existing RDF file with new Data Models or upgrades of already existing ones. + It checks if the param passed is the Data Models' collection or 'all' value (to work with every Data Model in + config file). Then it modifies the current RDF file with the new datasets and writes the new version of the RDF + into the filesystem. + :param tuple datamodels: Data Models that will be added to or modified in the RDF file + :return: None + """ + logging.info(msg.EDP_MODIFICATION_START.format(datamodels=', '.join(datamodels))) + + try: + rdf = None + datamodels = EDP.check_datamodels_parameter(datamodels, False) + for datamodel in datamodels: + dataset = Dataset(datamodel) + rdf = Serializer.serialize_rdf_update(dataset, rdf) + Serializer.write_rdf(rdf) + logging.info(msg.EDP_MODIFICATION_FINISHED_OK) + except Exception as error: + logging.error(error) + logging.info(msg.EDP_MODIFICATION_FINISHED_KO) + + def delete(self, datamodels): + """ + Core function that removes from an existing RDF file datasets from the specified Data Models. + It checks if the param passed is the Data Models' collection or 'all' value (to work with every Data Model in + config file). Then it removes the datasets from current RDF file and writes it into the filesystem. In case that + the Data Model to remove is the last one in the RDF, it deletes the entire file. + :param tuple datamodels: Data Models that will be removed from the RDF file + :return: None + """ + logging.info(msg.EDP_DELETE_START.format(datamodels=', '.join(datamodels))) + + try: + rdf = None + datamodels = EDP.check_datamodels_parameter(datamodels, True) + for dataset in datamodels: + rdf = Serializer.serialize_rdf_remove(dataset, ConfigManager.get_dataset_id(dataset), rdf) + ConfigManager.remove_dataset_id(dataset) + Serializer.write_rdf(rdf) + logging.info(msg.EDP_DELETE_FINISHED_OK) + except LastDatasetError as error: + logging.warning(error) + for dataset in ConfigManager.get_integrated_datasets(): + ConfigManager.remove_dataset_id(dataset) + import os + os.remove(Helpers.get_rdf_path()) + logging.info(msg.EDP_DELETE_FINISHED_OK) + except Exception as error: + logging.error(error) + logging.info(msg.EDP_DELETE_FINISHED_KO) + + @staticmethod + def generate_config_file(path): + """ + Core function that creates a new configuration as a copy of the template. + :param str path: Path where the config file will be written + :return: None + """ + try: + Helpers.instantiate_logger() + + copyfile(Helpers.get_config_file_template_path(), path) + logging.info(msg.EDP_CONFIG_FILE_GENERATION.format(path=path)) + except ValueError: + import click + click.echo(msg.EDP_ERROR_INSTANTIATING_LOGGER.format( + date=datetime.strftime(datetime.now(), const.SIMPLE_DATE_FORMAT), script=__name__)) + except Exception: + logging.error(msg.EDP_CONFIG_FILE_GENERATION_FAILED) + + @staticmethod + def check_datamodels_parameter(parameter, integrated): + """ + Core function that checks datamodels parameter value. If the first value of the tuple is equal to 'all' (default + value), it searches in config file for every Data Model section to return them. If not, it returns the parameter + as the user set it. + :param tuple parameter: Tuple of strings containing the Data Model informed by the user + :param bool integrated: If the Data Models searched are those already integrated or not + :return: Collection of Data Models to work with + :rtype: list[str] + """ + if parameter[0] == const.DEFAULT_DATAMODEL_OPTION_COMMAND: + return ConfigManager.get_integrated_datasets() if integrated else ConfigManager.get_datamodels() + return list(parameter) + + @staticmethod + def get_integrated_datamodels(): + """ + Core function that returns the collection of Data Models (sections from config file) that are currently included + in the RDF/XML file. + :return: Collection of Data Models integrated + :rtype: list[str] + """ + return ConfigManager.get_integrated_datasets() diff --git a/cb_edp/errors/__init__.py b/cb_edp/errors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cb_edp/errors/api.py b/cb_edp/errors/api.py new file mode 100644 index 0000000..4abcd5a --- /dev/null +++ b/cb_edp/errors/api.py @@ -0,0 +1,37 @@ +import cb_edp.utils.messages as msg + + +class CouldNotReadRDFError(Exception): + def __init__(self, payload=None, message=None, short_message=None): + """ + This exception is raised when there is a problem accessing RDF file. + :param int or None status_code: Response's status code + :param str or None payload: Additional information for the response + :param str or None message: Custom exception message + :param str or None short_message: Custom exception short message + """ + Exception.__init__(self) + default_message = msg.API_COULD_NOT_READ_RDF_ERROR + default_short_message = msg.API_COULD_NOT_READ_RDF_SHORT_ERROR + self.message = message if message else default_message + self.status_code = 404 + self.payload = payload + self.short_message = short_message if short_message else default_short_message + + +class APIProcessError(Exception): + def __init__(self, payload=None, message=None, short_message=None): + """ + This exception is raised when there is a problem processing the query submitted by the user. + :param int or None status_code: Response's status code + :param str or None payload: Additional information for the response + :param str or None message: Custom exception message + :param str or None short_message: Custom exception short message + """ + Exception.__init__(self) + default_message = msg.API_PROCESS_FAILED_ERROR + default_short_message = msg.API_PROCESS_FAILED_SHORT_ERROR + self.message = message if message else default_message + self.status_code = 500 + self.payload = payload + self.short_message = short_message if short_message else default_short_message diff --git a/cb_edp/errors/config.py b/cb_edp/errors/config.py new file mode 100644 index 0000000..7b51d99 --- /dev/null +++ b/cb_edp/errors/config.py @@ -0,0 +1,73 @@ +import cb_edp.utils.messages as msg + + +class ConfigFilePathError(Exception): + def __init__(self, path, message=None): + """ + This exception is raised if the path informed for the configuration file is not correct. + :param str or None path: Provided path to config file + :param str or None message: Custom exception message + """ + default_message = msg.CONFIG_FILE_PATH_ERROR.format(path=path) + super(ConfigFilePathError, self).__init__(message if message else default_message) + + +class NotInformedFieldError(Exception): + def __init__(self, field, message=None): + """ + This exception is raised if a field is not informed in the configuration file. + :param str or None field: Name of the field not informed + :param str or None message: Custom exception message + """ + default_message = msg.NOT_INFORMED_FIELD_ERROR.format(field=field) + super(NotInformedFieldError, self).__init__(message if message else default_message) + + +class WrongFormatError(Exception): + def __init__(self, field, value, message=None): + """ + This exception is raised when a value has wrong format. + :param str or None field: Name of the field + :param str or None value: Value of the field + :param str or None message: Custom exception message + """ + default_message = msg.WRONG_FORMAT_ERROR.format(field=field, value=value) + super(WrongFormatError, self).__init__(message if message else default_message) + + +class SectionKeyError(Exception): + def __init__(self, section, key, message=None): + """ + This exception is raised if a key is not present in the given section. + :param str or None section: Section from the configuration file + :param str or None key: Key of a section for a value from the configuration file + :param str or None message: Custom exception message + """ + default_message = msg.SECTION_KEY_ERROR.format(key=key, section=section) + super(SectionKeyError, self).__init__(message if message else default_message) + + +class NotExpectedValueError(Exception): + def __init__(self, field, value, choices, message=None): + """ + This exception is raised if a field is informed with a value not expected. + :param str or None field: Name of the field wrong informed + :param str or None value: Value of the field + :param str or list[str] or None choices: Collection of possible values for the field + :param str or None message: Custom exception message + """ + if type(choices) is list: + choices = ', '.join(choices) + default_message = msg.NOT_EXPECTED_VALUE_ERROR.format(field=field, value=value, choices=choices) + super(NotExpectedValueError, self).__init__(message if message else default_message) + + +class NoIDForDataModelError(Exception): + def __init__(self, datamodel, message=None): + """ + This exception is raised if a Data Model's ID should exist but it does not. + :param str datamodel: Name of the Data Model without ID + :param str or None message: Custom exception message + """ + default_message = msg.NOT_ID_FOR_DATAMODEL_ERROR.format(datamodel=datamodel) + super(NoIDForDataModelError, self).__init__(message if message else default_message) diff --git a/cb_edp/errors/rdf.py b/cb_edp/errors/rdf.py new file mode 100644 index 0000000..7ecefda --- /dev/null +++ b/cb_edp/errors/rdf.py @@ -0,0 +1,55 @@ +import cb_edp.utils.messages as msg + + +class WritingRDFError(Exception): + def __init__(self, path, message=None): + """ + This exception is raised when an error writing the RDF file occurs. + :param str or None path: Path where the RDF file is located + :param str or None message: Custom exception message + """ + default_message = msg.WRITING_RDF_ERROR.format(path=path) + super(WritingRDFError, self).__init__(message if message else default_message) + + +class RDFFileNotFoundError(Exception): + def __init__(self, path, message=None): + """ + This exception is raised when the solution is trying to load an RDF but it cannot find it. + :param str or None path: Path where the RDF file is located + :param str or None message: Custom exception message + """ + default_message = msg.RDF_FILE_NOT_FOUND_ERROR.format(path=path) + super(RDFFileNotFoundError, self).__init__(message if message else default_message) + + +class RDFParserError(Exception): + def __init__(self, path, message=None): + """ + This exception is raised when there is a problem parsing an RDF file to a XML tree. + :param str or None path: Path where the RDF file is located + :param str or None message: Custom exception message + """ + default_message = msg.RDF_PARSING_ERROR.format(path=path) + super(RDFParserError, self).__init__(message if message else default_message) + + +class DatasetNotFoundError(Exception): + def __init__(self, datamodel, message=None): + """ + This exception is raised when trying to remove a dataset from an RDF and it does not exists. + :param str or None datamodel: Name of the Data Model trying to remoce + :param str or None message: Custom exception message + """ + default_message = msg.DATASET_NOT_FOUND_ERROR.format(datamodel=datamodel) + super(DatasetNotFoundError, self).__init__(message if message else default_message) + + +class LastDatasetError(Exception): + def __init__(self, message=None): + """ + This exception is raised when trying to remove a dataset from an RDF when this is the last one remaining. + :param str or None message: Custom exception message + """ + default_message = msg.LAST_DATASET_ERROR + super(LastDatasetError, self).__init__(message if message else default_message) diff --git a/cb_edp/model/__init__.py b/cb_edp/model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cb_edp/model/catalogue.py b/cb_edp/model/catalogue.py new file mode 100644 index 0000000..4d99c07 --- /dev/null +++ b/cb_edp/model/catalogue.py @@ -0,0 +1,76 @@ +import logging + +import cb_edp.conf.constants as const +import cb_edp.utils.messages as msg +from cb_edp.conf.constants import Model +from cb_edp.conf.manager import ConfigManager +from cb_edp.model.dataset import Dataset +from cb_edp.utils.helpers import Helpers +from cb_edp.utils.validators import Validators + + +class Catalogue: + """ + A class that represents the metadata catalogue that will be integrated. + + :param list[str] sections: Config file sections to integrate. + :param str title: Datasets catalogue title. + :param str description: Datasets catalogue description. + :param str publisher_name: Name of catalogue's publisher. + :param str homepage: URL where the catalogue is located. + :param str issued: Date when the catalogue was created. + :param str id: Catalogue's unique identifier. + :param str uri: URI built by a URL and catalogue's ID. + :param list[Dataset] datasets: Collection containing the datasets that will be integrated. + """ + + def __init__(self, sections): + """ + Initializes Catalogue. + :param list[str] sections: Config file sections to integrate. + :param bool new: Indicates if the Catalogue to create is a new instance or one instantiated in a previous run + """ + logging.debug(msg.CATALOGUE_INSTANTIATING_MODEL_START.format(datamodels=', '.join(sections))) + + self.sections = sections + self.title = ConfigManager.get_value(const.CATALOGUE_SECTION, const.CATALOGUE_TITLE) + Validators.is_informed(const.CATALOGUE_TITLE, self.title) + self.description = ConfigManager.get_value(const.CATALOGUE_SECTION, const.CATALOGUE_DESCRIPTION) + Validators.is_informed(const.CATALOGUE_DESCRIPTION, self.description) + self.publisher_name = ConfigManager.get_value(const.CATALOGUE_SECTION, const.CATALOGUE_PUBLISHER_NAME) + Validators.is_informed(const.CATALOGUE_PUBLISHER_NAME, self.publisher_name) + self.publisher_uri = ConfigManager.get_value(const.CATALOGUE_SECTION, const.CATALOGUE_PUBLISHER_URI) + Validators.is_informed(const.CATALOGUE_PUBLISHER_URI, self.publisher_uri) + Validators.is_valid_url(const.CATALOGUE_PUBLISHER_URI, self.publisher_uri) + self.publisher_type = Helpers.transform_vocabulary(const.CATALOGUE_PUBLISHER_TYPE, + ConfigManager.get_value(const.CATALOGUE_SECTION, + const.CATALOGUE_PUBLISHER_TYPE), + const.PUBLISHER_TYPE_RELATION) + self.publisher_homepage = ConfigManager.get_value(const.CATALOGUE_SECTION, const.CATALOGUE_PUBLISHER_HOMEPAGE) + Validators.is_valid_url(const.CATALOGUE_PUBLISHER_HOMEPAGE, self.publisher_homepage) + self.homepage = ConfigManager.get_value(const.CATALOGUE_SECTION, const.CATALOGUE_HOMEPAGE) + Validators.is_valid_url(const.CATALOGUE_SECTION, self.homepage) + uri_host = ConfigManager.get_value(const.MAIN_SECTION, const.URI_HOST) + uri_structure = ConfigManager.get_value(const.MAIN_SECTION, const.URI_STRUCTURE, const.URI_STRUCTURE_DEFAULT) + self.uri, self.id = Helpers.generate_uri(uri_host, uri_structure, Model.CATALOGUE) + self.issued = Helpers.get_issued_date(self.id) + + logging.debug(msg.CATALOGUE_INSTANTIATING_MODEL_FINISHED) + + self.datasets = self.create_datasets(sections) + + @staticmethod + def create_datasets(sections): + """ + Instantiate datasets set by config file. Then it adds them to Catalogue instance. + :param list[str] sections: Sections' name of the dataset to add to the catalogue. + :return: Collection of datasets belonging to the catalogue + :rtype: list[Dataset] + """ + datasets = [] + for section in sections: + dataset = Dataset(section) + datasets.append(dataset) + + logging.debug(msg.CATALOGUE_DATASETS_CREATED.format(datasets=len(datasets))) + return datasets diff --git a/cb_edp/model/dataset.py b/cb_edp/model/dataset.py new file mode 100644 index 0000000..d4c15e9 --- /dev/null +++ b/cb_edp/model/dataset.py @@ -0,0 +1,110 @@ +import logging + +import cb_edp.conf.constants as const +import cb_edp.utils.messages as msg +from cb_edp.conf.constants import Model +from cb_edp.conf.manager import ConfigManager +from cb_edp.model.resource import Resource +from cb_edp.utils.helpers import Helpers +from cb_edp.utils.validators import Validators + + +class Dataset: + """ + A class that represents a single instance of a dataset of those that will be integrated. + + :param str section: Section in the config file where it belongs + :param str service: FIWARE Service which the Data Models for this dataset belong to + :param str service_path: FIWARE Service Path where the Data Models of this dataset are located in their service + :param str type: Type name of the Data Model/entity + :param str title: The dataset title + :param str description: Dataset description + :param str contact_point: Email address of dataset's publisher + :param list[str] keywords: Collection of keywords + :param str publisher_name: Name of the organization publishing the dataset + :param list[str] themes: Categories that apply to this dataset's Data Model + :param str access_rights: Literal that indicates dataset's openness + :param str periodicity: Literal that indicates dataset's update frequency + :param str spatial: Path to a GeoJSON file containing the polygon area where this datasets applies + :param str landing_page: Web-page URL with information about the dataset + :param list[str] allocations: Collection of different ways to generate the resources + :param str issued: Date when the dataset was created + :param str id: Dataset's unique identifier + :param str uri: URI built by a URL and dataset's ID + :param list[Resource] resources: Collection containing the resources that belong to the dataset + """ + + def __init__(self, section): + """ + Initializes Dataset. + :param str section: Config file section the dataset belongs + """ + logging.debug(msg.DATASET_INSTANTIATING_MODEL_START.format(datamodel=section)) + + self.section = section + Validators.is_expected_value(const.DATAMODEL_SECTION, section, ConfigManager.get_datamodels()) + self.service = ConfigManager.get_value(section, const.DATAMODEL_FIWARE_SERVICE) + self.service_path = ConfigManager.get_value(section, const.DATAMODEL_FIWARE_SERVICE_PATH) + self.type = ConfigManager.get_value(section, const.DATAMODEL_TYPE) + Validators.is_informed(const.DATAMODEL_TYPE, self.type) + self.title = ConfigManager.get_value(section, const.DATASET_TITLE) + Validators.is_informed(const.DATASET_TITLE, self.title) + self.description = ConfigManager.get_value(section, const.DATASET_DESCRIPTION) + Validators.is_informed(const.DATASET_DESCRIPTION, self.description) + self.contact_point = ConfigManager.get_value(section, const.DATASET_CONTACT_POINT) + self.keywords = ConfigManager.get_value(section, const.DATASET_KEYWORDS).split('%') + if len(self.keywords) > 1: + Validators.is_informed(const.DATASET_KEYWORDS, self.keywords) + self.publisher_name = ConfigManager.get_value(section, const.DATASET_PUBLISHER_NAME) + self.publisher_uri = ConfigManager.get_value(section, const.DATASET_PUBLISHER_URI) + Validators.is_valid_url(const.DATASET_PUBLISHER_URI, self.publisher_uri) + self.publisher_type = Helpers.transform_vocabulary(const.DATASET_PUBLISHER_TYPE, + ConfigManager.get_value(section, + const.DATASET_PUBLISHER_TYPE), + const.PUBLISHER_TYPE_RELATION) + self.publisher_homepage = ConfigManager.get_value(section, const.DATASET_PUBLISHER_HOMEPAGE) + Validators.is_valid_url(const.DATASET_PUBLISHER_HOMEPAGE, self.publisher_homepage) + self.themes = Helpers.transform_themes(ConfigManager.get_value(section, const.DATASET_THEMES).split()) + self.access_rights = Helpers.transform_vocabulary(const.DATASET_ACCESS_RIGHTS, + ConfigManager.get_value(section, const.DATASET_ACCESS_RIGHTS), + const.DATASET_ACCESS_RIGHTS_RELATION) + self.periodicity = Helpers.transform_vocabulary(const.DATASET_PERIODICITY, + ConfigManager.get_value(section, const.DATASET_PERIODICITY), + const.DATASET_FREQUENCY_RELATION) + self.spatial = ConfigManager.get_value(section, const.DATASET_SPATIAL) + if self.spatial: + Validators.is_valid_path(const.DATASET_SPATIAL, self.spatial) + self.spatial = Helpers.get_spatial_polygon(self.spatial) + self.landing_page = ConfigManager.get_value(section, const.DATASET_LANDING_PAGE) + self.allocations = ConfigManager.get_value(section, const.DATASET_ALLOCATION).split() + self.id = ConfigManager.get_dataset_id(section) + uri_host = ConfigManager.get_value(const.MAIN_SECTION, const.URI_HOST) + uri_structure = ConfigManager.get_value(const.MAIN_SECTION, const.URI_STRUCTURE, const.URI_STRUCTURE_DEFAULT) + self.uri, self.id = Helpers.generate_uri(uri_host, uri_structure, Model.DATASET, self.id if self.id else None) + logging.debug(msg.DATASET_SAVING_ID.format(datamodel=section, id=self.id)) + ConfigManager.save_dataset_id(section, self.id) + self.issued = Helpers.get_issued_date(self.id) if self.id else '' + + self.resources = self.create_resources() + + logging.debug(msg.DATASET_INSTANTIATING_MODEL_FINISHED) + + def create_resources(self): + """ + Instantiate resources set by config file. Then it adds them to Dataset instance. + :return: Collection of resources belonging to a dataset + :rtype: list[Resources] + """ + if self.type in const.DATAMODELS.keys(): + datamodels = const.DATAMODELS[self.type] + else: + datamodels = const.DATAMODELS_DEFAULT + datamodels['models'] = [self.type] + + resources = [] + for allocation in self.allocations: + Validators.is_expected_value(const.DATASET_ALLOCATION, allocation, datamodels['allocation']) + resources += Resource.create_resources(self, datamodels['models'], allocation) + + logging.debug(msg.DATASET_RESOURCES_CREATED.format(resources=len(resources), datamodel=self.section)) + return resources diff --git a/cb_edp/model/resource.py b/cb_edp/model/resource.py new file mode 100644 index 0000000..2e65682 --- /dev/null +++ b/cb_edp/model/resource.py @@ -0,0 +1,99 @@ +import logging + +import cb_edp.conf.constants as const +import cb_edp.utils.messages as msg +from cb_edp.api.builder import APIBuilder +from cb_edp.conf.constants import Model, Allocation +from cb_edp.conf.manager import ConfigManager +from cb_edp.utils.helpers import Helpers +from cb_edp.utils.validators import Validators + + +class Resource: + """ + A class that represents a single instance of a resources of those that will be integrated. + + :param str section: Section in the config file where it belongs + :param str license: URL to license information + :param str rights: Simple text rights information + :param str title: The resource title + :param str url: URL for accessing to context data + :param str uri: URI built by a URL and resource's ID + """ + + def __init__(self, section): + """ + Initializes Resource. + :param str section: Config file section the resource belongs + """ + logging.debug(msg.RESOURCE_INSTANTIATING_MODEL_START.format(datamodel=section)) + + self.section = section + self.license = ConfigManager.get_value(self.section, const.RESOURCE_LICENSE) + uri_host = ConfigManager.get_value(const.MAIN_SECTION, const.URI_HOST) + uri_structure = ConfigManager.get_value(const.MAIN_SECTION, const.URI_STRUCTURE, const.URI_STRUCTURE_DEFAULT) + self.uri = Helpers.generate_uri(uri_host, uri_structure, Model.RESOURCE)[0] + self.description = msg.RESOURCE_DESCRIPTION + self.title = '' + self.url = '' + + logging.debug(msg.RESOURCE_INSTANTIATING_MODEL_FINISHED) + + @staticmethod + def create_resources(dataset, datamodels, allocation): + """ + Creates a collection of resources based on the configuration set. + :param Dataset dataset: Parent of the resources created + :param list[str] datamodels: Collection of the Data Models to take into account in the generation + :param str allocation: Literal that indicates how the filter will be done + :return: Collection of instantiated resources + :rtype: list[Resource] + """ + logging.debug(msg.RESOURCE_CREATE_RESOURCES.format(allocation=allocation, datamodels=', '.join(datamodels))) + + resources = [] + for datamodel in datamodels: + if allocation == Allocation.CATEGORY.value: + resources.append(Resource.create_resource_by_category(dataset, datamodel)) + elif allocation == Allocation.LOCATION.value: + locations = ConfigManager.get_value(dataset.section, const.RESOURCE_LOCATIONS).split('%') + Validators.is_informed(const.RESOURCE_LOCATIONS, locations) + for location in locations: + resources.append(Resource.create_resource_by_location(dataset, datamodel, location.strip())) + + return resources + + @staticmethod + def create_resource_by_category(dataset, category): + """ + Creates a resource based on a filter by category. + :param Dataset dataset: The dataset owning the resource to instantiate. + :param str category: Data Model by which the URL will filter. + :return: Instantiated dataset. + :rtype: Resource + """ + resource = Resource(dataset.section) + resource.title = Helpers.split_uppercase(category) + filters = {'entity': category} + resource.url = APIBuilder.build_resource_url(dataset.service, dataset.service_path, **filters) + logging.debug(msg.RESOURCE_CREATE_RESOURCE_ENTITY.format(name=resource.title, datamodel=category)) + return resource + + @staticmethod + def create_resource_by_location(dataset, category, location): + """ + Creates a resource based on a filter by location. + :param Dataset dataset: The dataset owning the resource to instantiate. + :param str category: Data Model by which the URL will filter. + :param str location: Geographical area by which the URL will filter. + :return: Instantiated dataset. + :rtype: Resource + """ + datamodel = Helpers.split_uppercase(category) + resource = Resource(dataset.section) + resource.title = msg.RESOURCE_TITLE_LOCATION.format(datamodel=datamodel, location=location) + filters = {'entity': category, 'location': location} + resource.url = APIBuilder.build_resource_url(dataset.service, dataset.service_path, **filters) + logging.debug( + msg.RESOURCE_CREATE_RESOURCE_LOCATION.format(name=resource.title, datamodel=category, location=location)) + return resource diff --git a/cb_edp/rdf/__init__.py b/cb_edp/rdf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cb_edp/rdf/serializer.py b/cb_edp/rdf/serializer.py new file mode 100644 index 0000000..951fa38 --- /dev/null +++ b/cb_edp/rdf/serializer.py @@ -0,0 +1,570 @@ +import copy +import logging +import re +import xml.etree.ElementTree as ET +from datetime import datetime +from xml.dom import minidom + +import cb_edp.conf.constants as const +import cb_edp.utils.messages as msg +from cb_edp.errors.rdf import DatasetNotFoundError +from cb_edp.errors.rdf import RDFFileNotFoundError +from cb_edp.errors.rdf import RDFParserError +from cb_edp.errors.rdf import WritingRDFError +from cb_edp.model.catalogue import Catalogue +from cb_edp.model.dataset import Dataset +from cb_edp.model.resource import Resource +from cb_edp.utils.helpers import Helpers +from cb_edp.utils.validators import Validators + + +class Serializer: + """ + The Serializer class provides methods to transform the models defined in the application to a well-formatted RDF/XML file. + This transformation is done having DCAT-AP 1.1 version in mind. The metadata present in result file will be, at + best, like the already defined in template.xml file. + + :param dict[str] namespaces: Namespaces needed for the building of the RDF/XML file + """ + namespaces = {} + + @staticmethod + def serialize_rdf_create(catalogue): + """ + Serializes an entire catalogue (its datasets and resources and itself) into an RDF/XML format file. + :param Catalogue catalogue: Catalogue model instance + :return: Tree representing serialized RDF/XML + :rtype: ET.ElementTree + """ + logging.info(msg.SERIALIZER_RDF_CREATION_START) + tree = Serializer._load_tree(Helpers.get_rdf_template_path()) + rdf = tree.getroot() + + Serializer.serialize_catalogue(rdf, catalogue) + + logging.info(msg.SERIALIZER_DATASETS_SERIALIZE_START) + for dataset in catalogue.datasets: + Serializer.serialize_dataset(rdf, dataset) + logging.info(msg.SERIALIZER_DATASETS_SERIALIZE_FINISHED) + + logging.info(msg.SERIALIZER_RESOURCES_SERIALIZE_START) + for dataset in catalogue.datasets: + for resource in dataset.resources: + Serializer.serialize_resource(rdf, resource) + logging.info(msg.SERIALIZER_RESOURCES_SERIALIZE_FINISHED) + + logging.info(msg.SERIALIZER_PUBLISHERS_SERIALIZE_START) + publishers = [ + (catalogue.publisher_uri, catalogue.publisher_name, catalogue.publisher_type, catalogue.publisher_homepage)] + for dataset in catalogue.datasets: + publishers.append( + (dataset.publisher_uri, dataset.publisher_name, dataset.publisher_type, dataset.publisher_homepage)) + publishers = set(publishers) + for publisher in publishers: + Serializer.serialize_publishers(rdf, publisher[0], publisher[1], publisher[2], publisher[3]) + logging.info(msg.SERIALIZER_PUBLISHERS_SERIALIZE_FINISHED) + + Serializer._remove_template_nodes(rdf) + + logging.info(msg.SERIALIZER_RDF_CREATION_FINISHED) + return tree + + @staticmethod + def serialize_rdf_update(dataset, rdf_local_tree=None): + """ + Updates the serialized RDF adding a new dataset (and its resources) into a new RDF/XML file. + :param Dataset dataset: Dataset model instance + :param ET.ElementTree rdf_local_tree: Tree representing locally stored serialized RDF/XML + :return: Tree representing the updated RDF/XML file + :rtype: ET.ElementTree + """ + logging.info(msg.SERIALIZER_RDF_UPDATE_START.format(dataset=dataset.section)) + + if not rdf_local_tree: + rdf_local_tree = Serializer._load_tree(Helpers.get_rdf_path()) + rdf_local_root = rdf_local_tree.getroot() + + rdf_template = Serializer._load_tree(Helpers.get_rdf_template_path()).getroot() + + Serializer._update_dataset_node(rdf_template, rdf_local_root, dataset) + + Serializer._update_catalogue_date(rdf_local_root, rdf_template) + Serializer._remove_template_nodes(rdf_template) + + for descendant in rdf_template.findall('*'): + rdf_local_root.append(descendant) + + catalogue_rdf = rdf_local_root.find(const.RDF_CATALOGUE, namespaces=Serializer.namespaces) + xpath = const.RDF_ATTRIBUTE_XPATH.format(element=const.RDF_CATALOGUE_DATASET, + attribute=const.RDF_ATTRIBUTE_RESOURCE, value=dataset.uri) + if catalogue_rdf.find(xpath, namespaces=Serializer.namespaces) is None: + logging.info(msg.SERIALIZER_RDF_UPDATE_NEW_DATASET.format(dataset=dataset.section)) + Serializer._set_value(catalogue_rdf, const.RDF_CATALOGUE_DATASET, dataset.uri, + attribute=const.RDF_ATTRIBUTE_RESOURCE, duplicate=True) + else: + Serializer._remove_dataset_node(dataset.section, rdf_local_root, dataset.id, False, updating=True) + + logging.info(msg.SERIALIZER_RDF_UPDATE_FINISHED.format(dataset=dataset.section)) + return rdf_local_tree + + @staticmethod + def serialize_rdf_remove(dataset_section, dataset_uri, rdf_local_tree=None): + """ + Updates the serialized RDF removing a dataset (and its resources) from an RDF/XML file. + :param str dataset_section: Name of the section in config file which the dataset belongs to + :param str dataset_uri: URI of the dataset + :param ET.ElementTree or None rdf_local_tree: Tree representing locally stored serialized RDF/XML + :return: Tree representing the updated RDF/XML file + :rtype: ET.ElementTree + """ + logging.info(msg.SERIALIZER_RDF_REMOVE_START.format(dataset=dataset_section)) + + if not rdf_local_tree: + rdf_local_tree = Serializer._load_tree(Helpers.get_rdf_path()) + local_rdf_root = rdf_local_tree.getroot() + + Serializer._remove_dataset_node(dataset_section, local_rdf_root, dataset_uri, remove_from_catalogue=True) + + rdf_template = Serializer._load_tree(Helpers.get_rdf_template_path()).getroot() + Serializer._update_catalogue_date(local_rdf_root, rdf_template) + + logging.info(msg.SERIALIZER_RDF_REMOVE_FINISHED.format(dataset=dataset_section)) + return rdf_local_tree + + @staticmethod + def serialize_catalogue(rdf, catalogue): + """ + Serializes an instanced catalogue model into its RDF/XML representation. + :param ET.Element rdf: Root element of RDF/XML + :param Catalogue catalogue: Catalogue model instance + :return: XML element representing serialized catalogue + :rtype: ET.Element + """ + logging.info(msg.SERIALIZER_CATALOGUE_SERIALIZE_START.format(datamodels=', '.join(catalogue.sections))) + + catalogue_rdf = Serializer._clone_node(rdf, const.RDF_CATALOGUE) + + Serializer._set_node_attribute(catalogue_rdf, const.RDF_ATTRIBUTE_ABOUT, catalogue.uri) + Serializer._set_value(catalogue_rdf, const.RDF_TITLE, catalogue.title) + Serializer._set_value(catalogue_rdf, const.RDF_DESCRIPTION, catalogue.description) + Serializer._set_value(catalogue_rdf, const.RDF_HOMEPAGE, catalogue.homepage, + attribute=const.RDF_ATTRIBUTE_RESOURCE, remove=True) + Serializer._set_value(catalogue_rdf, const.RDF_ISSUED, Helpers.format_datetime(datetime.utcnow())) + Serializer._remove_node(catalogue_rdf, const.RDF_MODIFIED) + Serializer._set_value(catalogue_rdf, const.RDF_PUBLISHER, catalogue.publisher_uri, const.RDF_ATTRIBUTE_RESOURCE) + uris = [dataset.uri for dataset in catalogue.datasets] + Serializer._set_multiple_values(catalogue_rdf, const.RDF_CATALOGUE_DATASET, uris, + attribute=const.RDF_ATTRIBUTE_RESOURCE) + + rdf.append(catalogue_rdf) + + logging.debug(msg.SERIALIZER_CATALOGUE_SERIALIZE_FINISHED) + return catalogue_rdf + + @staticmethod + def serialize_dataset(rdf, dataset, updated=False): + """ + Serializes an instanced dataset model into its RDF/XML representation. + :param ET.Element rdf: Root element of RDF/XML + :param Dataset dataset: Dataset model instance + :param bool updated: If it is a new or an updated dataset + :return: XML element representing serialized dataset + :rtype: ET.Element + """ + logging.info(msg.SERIALIZER_DATASET_SERIALIZE_START.format(datamodel=dataset.section)) + + dataset_rdf = Serializer._clone_node(rdf, const.RDF_DATASET) + + Serializer._set_node_attribute(dataset_rdf, const.RDF_ATTRIBUTE_ABOUT, dataset.uri) + Serializer._set_value(dataset_rdf, const.RDF_TITLE, dataset.title) + Serializer._set_value(dataset_rdf, const.RDF_DESCRIPTION, dataset.description) + Serializer._set_multiple_values(dataset_rdf, const.RDF_KEYWORD, dataset.keywords) + Serializer._set_value(dataset_rdf, const.RDF_PUBLISHER, dataset.publisher_uri, const.RDF_ATTRIBUTE_RESOURCE) + Serializer._set_multiple_values(dataset_rdf, const.RDF_THEME, dataset.themes, + attribute=const.RDF_ATTRIBUTE_RESOURCE) + if dataset.contact_point: + Serializer._set_value(dataset_rdf, const.RDF_ELEMENT_XPATH.format(element=const.RDF_CONTACT_POINT_NAME), + dataset.publisher_name) + Serializer._set_value(dataset_rdf, const.RDF_ELEMENT_XPATH.format(element=const.RDF_CONTACT_POINT_EMAIL), + 'mailto:{email}'.format(email=dataset.contact_point), + attribute=const.RDF_ATTRIBUTE_RESOURCE) + else: + Serializer._remove_node(dataset_rdf, const.RDF_CONTACT_POINT) + Serializer._set_value(dataset_rdf, const.RDF_PERIODICITY, dataset.periodicity, + attribute=const.RDF_ATTRIBUTE_RESOURCE, remove=True) + Serializer._set_value(dataset_rdf, const.RDF_IDENTIFIER, dataset.id) + Serializer._set_value(dataset_rdf, const.RDF_ISSUED, Helpers.get_issued_date(dataset.id)) + if updated: + Serializer._set_value(dataset_rdf, const.RDF_MODIFIED, Helpers.format_datetime(datetime.utcnow())) + else: + Serializer._remove_node(dataset_rdf, const.RDF_MODIFIED) + Serializer._set_value(dataset_rdf, const.RDF_RIGHTS, dataset.access_rights, + attribute=const.RDF_ATTRIBUTE_RESOURCE, remove=True) + Serializer._set_value(dataset_rdf, const.RDF_LANDING_PAGE, dataset.landing_page, + attribute=const.RDF_ATTRIBUTE_RESOURCE, remove=True) + if dataset.spatial: + geometry_nodes = dataset_rdf.findall(const.RDF_ELEMENT_XPATH.format(element=const.RDF_SPATIAL_GEOMETRY), + Serializer.namespaces) + for i, node in enumerate(geometry_nodes): + node.text = dataset.spatial[i] + else: + Serializer._remove_node(dataset_rdf, const.RDF_SPATIAL) + uris = [resource.uri for resource in dataset.resources] + Serializer._set_multiple_values(dataset_rdf, const.RDF_DATASET_RESOURCE, uris, + attribute=const.RDF_ATTRIBUTE_RESOURCE) + + rdf.append(dataset_rdf) + + logging.debug(msg.SERIALIZER_DATASET_SERIALIZE_FINISHED.format(datamodel=dataset.section)) + return dataset_rdf + + @staticmethod + def serialize_resource(rdf, resource): + """ + Serializes an instanced resource model into its RDF/XML representation. + :param ET.Element rdf: Root element of RDF/XML + :param Resource resource: Resource model instance + :return: XML element representing serialized resource + :rtype: ET.Element + """ + logging.debug(msg.SERIALIZER_RESOURCE_SERIALIZE_START) + + resource_rdf = Serializer._clone_node(rdf, const.RDF_RESOURCE) + Serializer._set_node_attribute(resource_rdf, const.RDF_ATTRIBUTE_ABOUT, resource.uri) + Serializer._set_value(resource_rdf, const.RDF_ACCESS_URL, resource.url, attribute=const.RDF_ATTRIBUTE_RESOURCE) + Serializer._set_value(resource_rdf, const.RDF_DESCRIPTION, resource.description) + Serializer._set_value(resource_rdf, const.RDF_TITLE, resource.title) + Serializer._set_value(resource_rdf, const.RDF_DOWNLOAD_URL, resource.url, + attribute=const.RDF_ATTRIBUTE_RESOURCE) + Serializer._set_value(resource_rdf, const.RDF_LICENSE, resource.license, attribute=const.RDF_ATTRIBUTE_RESOURCE, + remove=True) + + rdf.append(resource_rdf) + + logging.debug(msg.SERIALIZER_RESOURCE_SERIALIZE_FINISHED) + return resource_rdf + + @staticmethod + def serialize_publishers(rdf, publisher_uri, publisher_name, publisher_type, publisher_homepage): + """ + Serializes a publisher to a Organization node in an RDF/XML file. + :param ET.Element rdf: Root node of RDF file + :param str publisher_uri: URI used to reference dataset's publisher + :param str publisher_name: Name given to dataset's publisher + :param str publisher_type: URI indicating which kind of publisher is it + :param str publisher_homepage: Homepage of the publisher + :return: XML element representing serialized publisher + :rtype: ET.Element + """ + logging.debug(msg.SERIALIZER_PUBLISHER_SERIALIZE_START.format(name=publisher_name)) + + publisher_rdf = Serializer._clone_node(rdf, const.RDF_ORGANIZATION) + Serializer._set_node_attribute(publisher_rdf, const.RDF_ATTRIBUTE_ABOUT, publisher_uri) + Serializer._set_value(publisher_rdf, const.RDF_ORGANIZATION_NAME, publisher_name) + Serializer._set_value(publisher_rdf, const.RDF_TYPE, publisher_type, attribute=const.RDF_ATTRIBUTE_RESOURCE, + remove=True) + Serializer._set_value(publisher_rdf, const.RDF_HOMEPAGE, publisher_homepage, + attribute=const.RDF_ATTRIBUTE_RESOURCE, remove=True) + + rdf.append(publisher_rdf) + + logging.debug(msg.SERIALIZER_PUBLISHER_SERIALIZE_FINISHED.format(name=publisher_name)) + return publisher_rdf + + @staticmethod + def write_rdf(rdf): + """ + Writes the RDF into a file locally based on constants. + :param ET.ElementTree rdf: Tree containing RDF catalogue + :return: None + :raises WritingRDFError: + """ + root_str = ET.tostring(rdf.getroot()).decode('utf8') + root_str = re.sub('(>|>)(\t|\n|\r|\s)*(<|<)', '\g<1>\g<3>', root_str) + rdf_str = minidom.parseString(root_str).toprettyxml(indent='\t', encoding='utf-8') + try: + with open(Helpers.get_rdf_path(), 'w+') as file: + file.write(rdf_str.decode('utf8')) + except: + raise WritingRDFError(Helpers.get_rdf_path()) + + @staticmethod + def _set_value(parent, node_name, value, attribute=None, duplicate=False, remove=False): + """ + Sets a provided value to a specific node depending on if it goes as a text or as an attribute. + This manages too the possibility of duplicate the node or remove it based on what is needed. + :param ET.Element parent: Parent node of the node to modify + :param str node_name: Name of the node to modify + :param str value: Value to set to specified node + :param str or None attribute: Indicates the attribute that will be set with the value provided + :param bool duplicate: If the node has to be cloned and set later + :param bool remove: If the node must be removed if the value is invalid + :return: None + """ + if remove and not value: + Serializer._remove_node(parent, node_name) + return + + element = parent.find(node_name, namespaces=Serializer.namespaces) + if duplicate: + element = Serializer._clone_node(parent, node_name) + + if attribute: + Serializer._set_node_attribute(element, attribute, value) + else: + Serializer._set_node_text(element, value) + + if duplicate: + parent.append(element) + + @staticmethod + def _set_multiple_values(parent, node_name, values, attribute=None): + """ + Sets a provided collection of values to a specific node. + This node will be cloned from an existing one and then set with the corresponding value. + :param ET.Element parent: Parent node of the nodes to modify + :param str node_name: Name of the node to clone and modify + :param list[str] values: Values to set in different nodes + :param str or None attribute: Indicates the attribute that will be set with the value provided + :return: None + """ + for value in values: + Serializer._set_value(parent, node_name, value, attribute=attribute, duplicate=True) + Serializer._remove_node(parent, node_name) + + @staticmethod + def _set_node_text(element, value): + """ + To a given element, sets its text value to the provided. + :param ET.Element element: Node to modify + :param str value: Text value assigned to the element + :return: None + """ + element.text = value + + @staticmethod + def _set_node_attribute(element, attribute, value): + """ + To a given element, sets the value provided to the attribute informed. + :param ET.Element element: Node to modify + :param str attribute: Attribute that will be written (contains its name and namespace) + :param str value: Text value assigned to element's attribute + :return: None + """ + element.set(Serializer._transform_attribute(attribute), value) + + @staticmethod + def _remove_node(parent, name): + """ + Removes the first appearance of a node matching the filter provided. + :param ET.Element parent: Parent node of the node to remove + :param str name: Name of the node to remove + :return: None + """ + node = parent.find(name, namespaces=Serializer.namespaces) + parent.remove(node) + + @staticmethod + def _clone_node(parent, name): + """ + Makes a copy of specified node. + :param ET.Element parent: Parent node of the node to clone + :param str name: Name of the node to clone + :return: Copy of the node specified + :rtype: ET.Element + """ + node = parent.find(name, namespaces=Serializer.namespaces) + return copy.deepcopy(node) + + @staticmethod + def _transform_attribute(attribute): + """ + Given an attribute, it transforms it to a format usable for ElementTree library. + :param str attribute: XML attribute in namespace:attribute format + :return: Attribute in {namespace-uri}attribute format + :rtype: str + """ + prefix, suffix = attribute.split(':') + uri = Serializer.namespaces[prefix] + return '{{{uri}}}{suffix}'.format(uri=uri, suffix=suffix) + + @staticmethod + def _load_tree(xml_path): + """ + Loads the ElementTree object from a XML + :param str xml_path: Path to RDF/XML file. + :return: Parsed XML file + :rtype: ET.ElementTree + """ + logging.debug(msg.SERIALIZER_LOAD_TREE.format(path=xml_path)) + + Validators.is_file_at_path(xml_path) + Serializer.namespaces = Serializer._get_rdf_namespaces(xml_path) + Serializer._register_namespaces(Serializer.namespaces) + try: + return ET.parse(xml_path) + except ET.ParseError: + raise RDFParserError(xml_path) + except FileNotFoundError: + raise RDFFileNotFoundError(xml_path) + + @staticmethod + def _update_dataset_node(rdf_template, rdf_local, dataset): + """ + Adds a new dataset and every child depending on it. + :param ET.Element rdf_template: Root node of RDF template file + :param ET.Element rdf_local: Root node of RDF local file + :param Dataset dataset: Dataset to add to the RDF + :return: None + """ + logging.debug(msg.SERIALIZER_UPDATE_DATASET_NODE.format(datamodel=dataset.section)) + + Serializer.serialize_dataset(rdf_template, dataset, updated=True) + for resource in dataset.resources: + Serializer.serialize_resource(rdf_template, resource) + + if not Serializer._dataset_publisher_node_appearances(rdf_local, dataset.publisher_uri): + Serializer.serialize_publishers(rdf_template, dataset.publisher_uri, dataset.publisher_name, + dataset.publisher_type, dataset.publisher_homepage) + + @staticmethod + def _remove_dataset_node(datamodel, rdf, uuid, remove_from_catalogue, updating=False): + """ + Removes a dataset and every node referenced by it from an actual RDF/XML file. + :param str datamodel: Data Model trying to remove + :param ET.Element rdf: Root node of already generated RDF file + :param str uuid: Dataset's identifier + :param bool remove_from_catalogue: Boolean that indicates if the reference to this dataset in Catalogue node must be removed too + :param bool updating: Boolean that indicates if this method is called during RDF updating + :return: None + :raises DatasetNotFoundError: + """ + logging.debug(msg.SERIALIZER_REMOVE_DATASET_NODE.format(datamodel=datamodel)) + + from cb_edp.conf.constants import Model + from cb_edp.conf.manager import ConfigManager + uri_host = ConfigManager.get_value(const.MAIN_SECTION, const.URI_HOST) + uri_structure = ConfigManager.get_value(const.MAIN_SECTION, const.URI_STRUCTURE, const.URI_STRUCTURE_DEFAULT) + uri = Helpers.generate_uri(uri_host, uri_structure, Model.DATASET, uuid)[0] + + dataset = rdf.find( + const.RDF_ATTRIBUTE_XPATH.format(element=const.RDF_DATASET, attribute=const.RDF_ATTRIBUTE_ABOUT, value=uri), + namespaces=Serializer.namespaces) + if dataset is None: + if updating: + logging.debug(msg.SERIALIZER_REMOVE_DATASET_NODE_NOT_PRESENT) + return + else: + raise DatasetNotFoundError(datamodel) + + Validators.is_last_dataset(rdf, Serializer.namespaces, const.RDF_DATASET) + + dataset_resources = dataset.findall(const.RDF_DATASET_RESOURCE, namespaces=Serializer.namespaces) + for dataset_resource in dataset_resources: + attribute = dataset_resource.attrib[Serializer._transform_attribute(const.RDF_ATTRIBUTE_RESOURCE)] + resource = rdf.find( + const.RDF_ATTRIBUTE_XPATH.format(element=const.RDF_RESOURCE, attribute=const.RDF_ATTRIBUTE_ABOUT, + value=attribute), namespaces=Serializer.namespaces) + rdf.remove(resource) + + publisher = dataset.find(const.RDF_PUBLISHER, namespaces=Serializer.namespaces) + publisher_uri = publisher.attrib[Serializer._transform_attribute(const.RDF_ATTRIBUTE_RESOURCE)] + if Serializer._dataset_publisher_node_appearances(rdf, publisher_uri) == 1: + publisher = dataset.find(const.RDF_PUBLISHER, namespaces=Serializer.namespaces) + publisher_uri = publisher.attrib[Serializer._transform_attribute(const.RDF_ATTRIBUTE_RESOURCE)] + organization = rdf.find( + const.RDF_ATTRIBUTE_XPATH.format(element=const.RDF_ORGANIZATION, attribute=const.RDF_ATTRIBUTE_ABOUT, + value=publisher_uri), namespaces=Serializer.namespaces) + rdf.remove(organization) + + rdf.remove(dataset) + + if remove_from_catalogue: + catalogue = rdf.find(const.RDF_CATALOGUE, Serializer.namespaces) + dataset = catalogue.find(const.RDF_ATTRIBUTE_XPATH.format(element=const.RDF_CATALOGUE_DATASET, + attribute=const.RDF_ATTRIBUTE_RESOURCE, + value=uri), namespaces=Serializer.namespaces) + catalogue.remove(dataset) + + @staticmethod + def _update_catalogue_date(rdf, rdf_template): + """ + Updates the modified date of the catalogue to the current one. + :param ET.Element rdf: Root node of already generated RDF file + :param ET.Element rdf_template: Root node of RDF template file + :return: None + """ + catalogue = rdf_template.find(const.RDF_CATALOGUE, namespaces=Serializer.namespaces) + template_modified_node = Serializer._clone_node(catalogue, const.RDF_MODIFIED) + + catalogue = rdf.find(const.RDF_CATALOGUE, namespaces=Serializer.namespaces) + local_modified_node = catalogue.find(const.RDF_MODIFIED, namespaces=Serializer.namespaces) + date = Helpers.format_datetime(datetime.utcnow()) + if local_modified_node is not None: + logging.debug(msg.SERIALIZER_CATALOGUE_DATE_NOT_EXISTS.format(date=date)) + Serializer._set_node_text(local_modified_node, date) + else: + logging.debug(msg.SERIALIZER_CATALOGUE_DATE_ALREADY_EXISTS.format(date=date)) + Serializer._set_node_text(template_modified_node, date) + catalogue.append(template_modified_node) + + @staticmethod + def _dataset_publisher_node_appearances(rdf, uri): + """ + Counts how many times a publisher node from a specific dataset is referenced in entire RDF file. + :param ET.Element rdf: Root node of already generated RDF file + :param str uri: URI of the publisher node + :return: Number of times dataset's publisher node is referenced + """ + publisher_attribute_xpath = const.RDF_ATTRIBUTE_XPATH.format(element=const.RDF_PUBLISHER, + attribute=const.RDF_ATTRIBUTE_RESOURCE, value=uri) + publisher_parent_xpath = const.RDF_ELEMENT_XPATH.format(element=const.RDF_DATASET) + + publishers_xpath = '{element}/{attribute}'.format(element=publisher_parent_xpath, + attribute=publisher_attribute_xpath) + publisher_nodes = rdf.findall(publishers_xpath, namespaces=Serializer.namespaces) + + logging.debug(msg.SERIALIZER_PUBLISHER_NODE_APPEARANCES.format(times=len(publisher_nodes))) + return len(publisher_nodes) + + @staticmethod + def _remove_template_nodes(rdf): + """ + Removes from the RDF supplied template's original nodes. + :param ET.Element rdf: Informed template RDF + :return: None + """ + Serializer._remove_node(rdf, const.RDF_CATALOGUE) + Serializer._remove_node(rdf, const.RDF_DATASET) + Serializer._remove_node(rdf, const.RDF_RESOURCE) + Serializer._remove_node(rdf, const.RDF_ORGANIZATION) + + @staticmethod + def _register_namespaces(namespaces): + """ + Registers the namespaces contained in the RDF file in the ElementTree module imported. + :param dict[str] namespaces: Path to RDF/XML file + :return: None + """ + for namespace in namespaces: + ET.register_namespace(namespace, namespaces[namespace]) + + @staticmethod + def _get_rdf_namespaces(rdf_file_path): + """ + Reads an RDF/XML file and obtains the namespaces present in it. + :param str rdf_file_path: Path to RDF/XML file + :return: A dictionary containing the different namespaces + :rtype: dict[str] + """ + try: + with open(rdf_file_path) as rdf: + raw_rdf = rdf.read() + except FileNotFoundError: + raise RDFFileNotFoundError(rdf_file_path) + + matches = re.findall(r'xmlns:(\w+)=\"([\w./:#-]+)\"', raw_rdf) + namespaces = {} + for match in matches: + namespaces[match[0]] = match[1] + return namespaces diff --git a/cb_edp/rdf/template.xml b/cb_edp/rdf/template.xml new file mode 100644 index 0000000..8cc0ced --- /dev/null +++ b/cb_edp/rdf/template.xml @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cb_edp/utils/__init__.py b/cb_edp/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cb_edp/utils/helpers.py b/cb_edp/utils/helpers.py new file mode 100644 index 0000000..3b67a58 --- /dev/null +++ b/cb_edp/utils/helpers.py @@ -0,0 +1,235 @@ +import base64 +import json +import logging +import os +import re +import uuid +from pathlib import Path + +from time_uuid import TimeUUID + +import cb_edp.conf.constants as const +import cb_edp.utils.messages as msg +from cb_edp.conf.constants import Model +from cb_edp.errors.config import NotInformedFieldError +from cb_edp.utils.validators import Validators + + +class Helpers(object): + """ + Utilities class. + """ + + @staticmethod + def instantiate_logger(): + """ + Loads the logger.yml file with the configuration for the different loggers to be used. + """ + import logging.config + import yaml + + package_path = os.path.dirname(os.path.abspath(__file__)) + with open(os.path.join(package_path, 'logger.yml'), 'r') as file: + config = yaml.safe_load(file.read()) + logging.config.dictConfig(config) + with open(config['handlers']['file']['filename']) as log: + add_line = sum(1 for _ in log) + if add_line: + with open(config['handlers']['file']['filename'], 'a') as log: + log.write('\n') + + @staticmethod + def generate_uri(host, structure, dataset_type, uuid=None): + """ + Builds a URI from the structure and host defined in config file. + :param str host: Host value for the URI + :param str structure: Structure used to form the final URL + :param Model dataset_type: For which element the URI will be generated + :param str or None uuid: ID in case that already exists. Default: None + :return: Tuple with well formatted URI and UUID + :rtype: (str, str) + """ + Validators.is_informed(const.URI_HOST, host) + + if not uuid: + uuid = str(TimeUUID.with_utcnow()) + + if dataset_type is Model.CATALOGUE: + return 'http://{host}/'.format(host=host), uuid + + Validators.is_expected_value(const.URI_STRUCTURE, '{host}', structure) + if structure[-1] is not '/': + structure += '/' + structure += '{type}/' + + return structure.format(host=host, type=dataset_type.value) + uuid, uuid + + @staticmethod + def get_issued_date(dataset_id): + """ + Cleans the value provided to get the issued date of the dataset. + :param str dataset_id: Dataset's stored identifier. + :return: The issued date in UTC ISO 8601 format. + :rtype: str + """ + time_uuid = TimeUUID.convert(uuid.UUID('{%s}' % dataset_id)) + return Helpers.format_datetime(time_uuid.get_datetime()) + + @staticmethod + def format_datetime(datetime): + """ + Transforms a given date in UTC ISO 8601 format. + :param datetime.datetime datetime: Date to format. + :return: UTC ISO 8601 formatted date. + :rtype: str + """ + return datetime.strftime('%Y-%m-%dT%H:%M:%SZ') + + @staticmethod + def split_uppercase(value): + """ + Splits a string based on its upper case characters. + :param str value: The string to be split. + :return: A string with blanks between its upper case characters. + :rtype: str + """ + return re.sub(r'([A-Z])', r' \1', value).strip() + + @staticmethod + def transform_themes(themes): + """ + From the dataset.themes field from the config file, transforms them to the URIs defined in DCAT-AP. + :param list[str] themes: String collection of themes from a dataset. + :return: Expected theme URIs. + :rtype: list + """ + transformed_themes = [] + if len(themes) > 1 and themes[0]: + for theme in themes: + transformed_themes.append( + Helpers.transform_vocabulary(const.DATASET_THEMES, theme, const.DATASET_THEMES_RELATION)) + return transformed_themes + + @staticmethod + def transform_vocabulary(field, value, vocabulary): + """ + Translates a value used in the config file to a value from a vocabulary. + :param str field: Name of the field from the configuration file. + :param str value: Value for the field. + :param dict vocabulary: Equivalence between config file and DCAT-AP values. + :return: The value transformed. + :rtype: str + """ + if not value: + return '' + Validators.is_expected_value(field, value, list(vocabulary.keys())) + return vocabulary[value] + + @staticmethod + def get_spatial_polygon(path): + """ + Loads the geometry from a GeoJSON. + :param str path: GeoJSON file location. + :return: Tuple which first value is a geometry-like object and the second one a string with geometry coordinates JSON formatted + :rtype: (str, str) + :raises NotInformedFieldError: + """ + try: + if os.path.isdir('/'.join(path.split('/')[:-1])): + if not os.path.exists(path): + return + else: + return + + with open(path) as file: + geojson = json.load(file) + if len(geojson['features']) > 1: + logging.warning(msg.HELPERS_SPATIAL_GEOJSON_NODES.format(path=path)) + + geometry = geojson['features'][0]['geometry'] + type = geometry['type'] + geometry = str(geometry['coordinates'])[1:][:-1] + + geometry_json = '"type":"{type}","coordinates":[{coordinates}'.format(type=type, + coordinates=geometry[:-1]) + geometry_json = '{' + geometry_json + ']]}' + + coordinates = re.sub(r'(,?)\s*\[(-?[\d\.]+),\s*(-?[\d\.]+)\]', '\g<1>\g<2> \g<3>', geometry) + coordinates = coordinates.replace('[', '(').replace(']', ')') + geometry_object = '{type}({coordinates})'.format(type=type.upper(), coordinates=coordinates) + + return geometry_object, geometry_json + except OSError: + raise NotInformedFieldError(None, message=msg.HELPERS_SPATIAL_GEOJSON_FILE_NOT_FOUND.format(path=path)) + + @staticmethod + def encode_base64_url(url): + """ + Encodes a URL in Base64 replacing then those chars that may cause troubles when used on a well-formatted URL. + :param url: URL to encode + :return: Base64 url + :rtype: str + """ + url = base64.b64encode(url.encode('ascii')) + url = url.replace(b'+', b'.').replace(b'=', b'').replace(b'/', b'_') + return url.decode('utf8') + + @staticmethod + def decode_base64_url(url): + """ + Decodes a URL in Base64 replacing first delicate chars put before by encode_base64_url() method. + :param url: URL to decode + :return: Usable and readable url + :rtype: str + """ + url = url.replace('.', '+').replace('_', '/') + missing_padding = len(url) % 4 + if missing_padding: + url += '=' * (4 - missing_padding) + url = base64.b64decode(url.encode('ascii')) + return url.decode('utf8') + + @staticmethod + def get_project_root(): + """ + Returns project root folder path. + :return: Path to project's root + :rtype: str + """ + return str(Path(__file__).parent.parent) + + @staticmethod + def get_rdf_template_path(): + """ + Returns RDF template file folder path. + :return: Path to RDF template file + :rtype: str + """ + return Helpers.get_project_root() + const.RDF_FILE_TEMPLATE_PATH + + @staticmethod + def get_datasets_ids_file_path(): + """ + Returns dataset IDs file path. + :return: Path to config file + :rtype: str + """ + return Helpers.get_project_root() + const.CONFIG_FILE_DATASETS_IDS_PATH + + @staticmethod + def get_config_file_template_path(): + """ + Returns configuration file template folder path. + :return: Path to config file template + :rtype: str + """ + return Helpers.get_project_root() + const.CONFIG_FILE_TEMPLATE_PATH + + @staticmethod + def get_rdf_path(): + """ + Returns output RDF file folder path. + :return: Path to RDF file + :rtype: str + """ + return Helpers.get_project_root() + const.RDF_FILE_PATH diff --git a/cb_edp/utils/logger.yml b/cb_edp/utils/logger.yml new file mode 100644 index 0000000..4094619 --- /dev/null +++ b/cb_edp/utils/logger.yml @@ -0,0 +1,25 @@ +version: 1 +formatters: + brief: + format: '%(asctime)s %(levelname)-8s [%(module)s] %(message)s' + datefmt: '%H:%M:%S' + precise: + format: '%(asctime)s %(levelname)s [%(pathname)s] %(message)s' + datefmt: '%Y-%m-%d %H:%M:%S' +handlers: + console: + class: logging.StreamHandler + formatter: brief + level: INFO + stream: ext://sys.stdout + file: + class: logging.handlers.RotatingFileHandler + formatter: precise + filename: /var/log/cb_edp.log + maxBytes: 5242880 + backupCount: 5 + level: DEBUG +root: + level: DEBUG + handlers: [console, file] + propagate: no \ No newline at end of file diff --git a/cb_edp/utils/messages.py b/cb_edp/utils/messages.py new file mode 100644 index 0000000..032433f --- /dev/null +++ b/cb_edp/utils/messages.py @@ -0,0 +1,105 @@ +# commands.py +COMMANDS_INTEGRATE_PROMPT = 'You have already generated an RDF file. Do you want to continue?' +COMMANDS_NEW_CONFIG_PROMPT = 'You have already generated a configuration file. Do you want to continue?' +COMMANDS_HELP_CONFIG_FILE = 'The configuration file path. [optional]' +COMMANDS_HELP_DATAMODELS = 'Name of the Data Models to integrate separated by blanks. Use "{command}" to modify every Data Model.' +COMMANDS_HELP_OVERWRITE = 'Ignore confirmation and overwrite existing file.' +COMMANDS_SHOW_INTEGRATED_DATAMODELS = 'Data Models available in the RDF file:' +COMMANDS_SHOW_INTEGRATED_DATAMODELS_EMPTY = 'You have not integrated any Data Models yet.' + +# edp.py +EDP_INITIALIZING = 'Initializing CB-EDP integration process (instantiating EDP core class)' +EDP_READING_CONFIG = 'Reading config file from {path}' +EDP_CHECK_API_STATUS = 'Checking if solution\'s API at {host} is up...' +EDP_API_STATUS_DOWN = '{host} seems to be down. Check if solution\'s API host is set correctly in config file or if the server is up' +EDP_INTEGRATION_START = 'Starting integration process for {datamodels} Data Model/s' +EDP_INTEGRATION_FINISHED_OK = 'Integration process finished successfully' +EDP_INTEGRATION_FINISHED_KO = 'Integration process finished with errors' +EDP_MODIFICATION_START = 'Starting integration modification process for {datamodels} Data Model/s' +EDP_MODIFICATION_FINISHED_OK = 'Integration modification process finished successfully' +EDP_MODIFICATION_FINISHED_KO = 'Integration modification process finished with errors' +EDP_DELETE_START = 'Starting integration removal process for {datamodels} Data Model/s' +EDP_DELETE_FINISHED_OK = 'Integration removal process finished successfully' +EDP_DELETE_FINISHED_KO = 'Integration removal process finished with errors' +EDP_CONFIG_FILE_GENERATION = 'Configuration file created successfully at {path}' +EDP_CONFIG_FILE_GENERATION_FAILED = 'Cannot create configuration file: Permission denied' +EDP_ERROR_INSTANTIATING_LOGGER = "{date} ERROR [{script}] Permission denied: you must run cb-edp as sudoer" + +# /api/main.py +API_STATUS_OK = 'CB-EDP API service running' + +# /errors/api.py +API_COULD_NOT_READ_RDF_SHORT_ERROR = 'Error trying to access RDF file' +API_COULD_NOT_READ_RDF_ERROR = 'There was an error trying to access the RDF/XML: file not found in filesystem.' +API_PROCESS_FAILED_SHORT_ERROR = 'Error during query processing' +API_PROCESS_FAILED_ERROR = 'There was an error processing your query. Check API service logs or contact application administrator.' + +# /errors/config.py +CONFIG_FILE_PATH_ERROR = 'There was a problem with the path to config file: {path}' +NOT_INFORMED_FIELD_ERROR = 'Field "{field}" is not informed' +WRONG_FORMAT_ERROR = 'Field "{field}" value is not well-formatted: {value}' +SECTION_KEY_ERROR = 'Key "{key}" is not present in section "{section}" from config file' +NOT_EXPECTED_VALUE_ERROR = 'Field "{field}" value ({value}) not expected. Possible values: {choices}' +NOT_ID_FOR_DATAMODEL_ERROR = 'ID for Data Model "{datamodel}" not found' + +# /errors/rdf.py +WRITING_RDF_ERROR = 'There was an error trying to write the RDF file in disk: {path}' +RDF_FILE_NOT_FOUND_ERROR = 'RDF/XML file not found when trying to load XML tree at: {path}' +RDF_PARSING_ERROR = 'There was a problem parsing the RDF/XML file at "{path}". Maybe it is malformed?' +DATASET_NOT_FOUND_ERROR = 'Dataset for "{datamodel}" Data Model not found in RDF file' +LAST_DATASET_ERROR = 'Removing last dataset in RDF file. This will remove entire RDF file' + +# /model/catalogue.py +CATALOGUE_INSTANTIATING_MODEL_START = 'Instantiating new Catalogue model for {datamodels} Data Models from config file info' +CATALOGUE_INSTANTIATING_MODEL_FINISHED = 'New Catalogue model instantiating process finalized successfully' +CATALOGUE_DATASETS_CREATED = '{datasets} dataset/s created' + +# /model/dataset.py +DATASET_INSTANTIATING_MODEL_START = 'Instantiating new Dataset model for "{datamodel}" Data Model from config file info' +DATASET_INSTANTIATING_MODEL_FINISHED = 'New Dataset model instantiating process finalized successfully' +DATASET_SAVING_ID = 'Saving {datamodel} dataset ID: {id}' +DATASET_RESOURCES_CREATED = '{resources} distribution/s created for "{datamodel}" dataset' + +# /model/resource.py +RESOURCE_INSTANTIATING_MODEL_START = 'Instantiating new Resource model for "{datamodel}" Data Model from config file info' +RESOURCE_INSTANTIATING_MODEL_FINISHED = 'New Resource model instantiating process finalized successfully' +RESOURCE_CREATE_RESOURCES = 'Building resources collection by {allocation} for {datamodels} Data Models' +RESOURCE_CREATE_RESOURCE_ENTITY = 'New resource "{name}" for "{datamodel}" created' +RESOURCE_CREATE_RESOURCE_LOCATION = 'New resource "{name}" for "{datamodel}" and {location} location created' +RESOURCE_DESCRIPTION = 'Results can be paginated using "offset" and "limit" as URL parameters' +RESOURCE_TITLE_LOCATION = '{datamodel} in {location}' + +# /rdf/serializer.py +SERIALIZER_RDF_CREATION_START = 'Creating new RDF/XML file from scratch' +SERIALIZER_RDF_CREATION_FINISHED = 'RDF/XML file creation process finished successfully' +SERIALIZER_RDF_UPDATE_START = 'Updating already created RDF file with "{dataset}" dataset' +SERIALIZER_RDF_UPDATE_NEW_DATASET = 'Adding new dataset to RDF file: {dataset}' +SERIALIZER_RDF_UPDATE_FINISHED = 'RDF file update with "{dataset}" dataset finished successfully' +SERIALIZER_RDF_REMOVE_START = 'Removing "{dataset}" dataset from already created RDF file' +SERIALIZER_RDF_REMOVE_FINISHED = '"{dataset}" dataset removal from RDF file process finished successfully' +SERIALIZER_LOAD_TREE = 'Loading tree from {path} XML file' +SERIALIZER_CATALOGUE_SERIALIZE_START = 'Serializing new catalogue for {datamodels} Data Models' +SERIALIZER_CATALOGUE_SERIALIZE_FINISHED = 'Catalogue serialization finished successfully' +SERIALIZER_DATASETS_SERIALIZE_START = 'Serializing catalogue\'s datasets...' +SERIALIZER_DATASETS_SERIALIZE_FINISHED = 'Catalogue\'s datasets serialization process finished successfully' +SERIALIZER_DATASET_SERIALIZE_START = 'Serializing "{datamodel}" Data Model dataset' +SERIALIZER_DATASET_SERIALIZE_FINISHED = 'Dataset serialization finished: "{datamodel}" dataset added to RDF' +SERIALIZER_RESOURCES_SERIALIZE_START = 'Serializing datasets\' distributions...' +SERIALIZER_RESOURCES_SERIALIZE_FINISHED = 'Dataset\'s distributions serialization process finished successfully' +SERIALIZER_RESOURCE_SERIALIZE_START = 'Serializing new distribution' +SERIALIZER_RESOURCE_SERIALIZE_FINISHED = 'Distribution serialization finished successfully' +SERIALIZER_PUBLISHERS_SERIALIZE_START = 'Serializing catalogue\'s publishers...' +SERIALIZER_PUBLISHERS_SERIALIZE_FINISHED = 'Catalogue\'s publishers serialization process finished successfully' +SERIALIZER_PUBLISHER_SERIALIZE_START = 'Serializing new publisher: {name}' +SERIALIZER_PUBLISHER_SERIALIZE_FINISHED = '{name} publisher serialization finished successfully' +SERIALIZER_UPDATE_DATASET_NODE = 'Adding new dataset version for "{datamodel}" Data Model to RDF file' +SERIALIZER_REMOVE_DATASET_NODE = 'Removing "{datamodel}" dataset from RDF file' +SERIALIZER_REMOVE_DATASET_NODE_NOT_PRESENT = 'Dataset not present in RDF file (ignoring removal)' +SERIALIZER_CATALOGUE_DATE_NOT_EXISTS = ' not exists for (creating with {date})' +SERIALIZER_CATALOGUE_DATE_ALREADY_EXISTS = ' already exists for (modifying with {date})' +SERIALIZER_PUBLISHER_NODE_APPEARANCES = ' node appears {times} times' + +# /utils/helpers.py +HELPERS_SPATIAL_GEOJSON_NODES = 'There is more than one drawable object (feature) in the GeoJSON provided at {path}. Using only the first one' +HELPERS_SPATIAL_GEOJSON_NOT_FOUND = 'GeoJSON could not be located in {path} file. Is it in JSON file provided? It should be at first place' +HELPERS_SPATIAL_GEOJSON_FILE_NOT_FOUND = 'There was an error trying to access GeoJSON file specified in {path}' diff --git a/cb_edp/utils/validators.py b/cb_edp/utils/validators.py new file mode 100644 index 0000000..e034f04 --- /dev/null +++ b/cb_edp/utils/validators.py @@ -0,0 +1,101 @@ +import os +import re + +from cb_edp.errors.config import NotExpectedValueError +from cb_edp.errors.config import NotInformedFieldError +from cb_edp.errors.config import WrongFormatError +from cb_edp.errors.rdf import RDFFileNotFoundError +from cb_edp.errors.rdf import LastDatasetError + + +class Validators(object): + """ + Utilities class that implements static validators methods to use in the project. + """ + + @staticmethod + def is_informed(field, value): + """ + Checks if a value is informed. If not, raises an exception. + :param str field: Name of the field in configuration file + :param str or list[str] value: Value of the field set in configuration file + :return: None + :raises NotInformedFieldError: + """ + if not value: + raise NotInformedFieldError(field) + + @staticmethod + def is_valid_url(field, value): + """ + Checks if a given host is a properly formed URL. If not, raises an exception. + :param str field: Field name which the host belongs to + :param str value: Host to check (could be domain, localhost or an IP) + :return: None + :raises WrongFormatError: + """ + if not value: + return + + regex = re.compile( + r'^(?:http|ftp)s?://' + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' + r'localhost|' + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' + r'(?::\d+)?' + r'(?:/?|[/?]\S+)$', re.IGNORECASE) + + if re.match(regex, value) is None: + raise WrongFormatError(field, value) + + @staticmethod + def is_valid_path(field, value): + """ + Checks if a given value is a properly formed path. If not, raises an exception. + :param str field: Field name which the path belongs to + :param str value: Path to check + :return: None + :raises WrongFormatError: + """ + import os + if not os.path.exists(value): + raise WrongFormatError(field, value) + + @staticmethod + def is_expected_value(field, value, choices): + """ + Check if a specific value appears inside a collection of possible choices. If not, raises an exception. + :param str field: Name of the field in configuration file + :param str value: Value of the field set in configuration file + :param str or list[str] or dict choices: Available possibilities for the field + :return: None + :raises NotExpectedValueError: + """ + if value and value not in choices: + raise NotExpectedValueError(field, value, choices) + + @staticmethod + def is_file_at_path(path): + """ + Check if a file on a given path exists in the filesystem. If not, raises an exception. + :param str path: Path where the file to check is located + :return: None + :raises RDFFileNotFoundError: + """ + if os.path.isdir('/'.join(path.split('/')[:-1])): + if os.path.exists(path): + return + raise RDFFileNotFoundError(path) + + @staticmethod + def is_last_dataset(rdf, namespaces, dataset_tag): + """ + Check if there is more than one datasets in a given RDF/XML file. If not, raises an exception. + :param xml.etree.ElementTree.Element rdf: Parsed RDF file + :param dict namespaces: Namespaces used by the RDF/XML file + :param str dataset_tag: Tag identifier for dataset element + :return: None + :raises LastDatasetError: + """ + if len(rdf.findall(dataset_tag, namespaces=namespaces)) == 1: + raise LastDatasetError diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..9d96fc6 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,56 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = 'CB-EDP' +copyright = '2019, CEF Digital' +author = 'CEF Digital' + +# The full version, including alpha/beta/rc tags +release = '1.0' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'default' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] diff --git a/docs/dev/cb_edp.api.rst b/docs/dev/cb_edp.api.rst new file mode 100644 index 0000000..1dc68bf --- /dev/null +++ b/docs/dev/cb_edp.api.rst @@ -0,0 +1,38 @@ +cb\_edp.api package +=================== + +Submodules +---------- + +cb\_edp.api.builder module +-------------------------- + +.. automodule:: cb_edp.api.builder + :members: + :undoc-members: + :show-inheritance: + +cb\_edp.api.main module +----------------------- + +.. automodule:: cb_edp.api.main + :members: + :undoc-members: + :show-inheritance: + +cb\_edp.api.wsgi module +----------------------- + +.. automodule:: cb_edp.api.wsgi + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: cb_edp.api + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/dev/cb_edp.conf.rst b/docs/dev/cb_edp.conf.rst new file mode 100644 index 0000000..2d92b6e --- /dev/null +++ b/docs/dev/cb_edp.conf.rst @@ -0,0 +1,30 @@ +cb\_edp.conf package +==================== + +Submodules +---------- + +cb\_edp.conf.constants module +----------------------------- + +.. automodule:: cb_edp.conf.constants + :members: + :undoc-members: + :show-inheritance: + +cb\_edp.conf.manager module +--------------------------- + +.. automodule:: cb_edp.conf.manager + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: cb_edp.conf + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/dev/cb_edp.errors.rst b/docs/dev/cb_edp.errors.rst new file mode 100644 index 0000000..50b7fd0 --- /dev/null +++ b/docs/dev/cb_edp.errors.rst @@ -0,0 +1,38 @@ +cb\_edp.errors package +====================== + +Submodules +---------- + +cb\_edp.errors.api module +------------------------- + +.. automodule:: cb_edp.errors.api + :members: + :undoc-members: + :show-inheritance: + +cb\_edp.errors.config module +---------------------------- + +.. automodule:: cb_edp.errors.config + :members: + :undoc-members: + :show-inheritance: + +cb\_edp.errors.rdf module +------------------------- + +.. automodule:: cb_edp.errors.rdf + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: cb_edp.errors + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/dev/cb_edp.model.rst b/docs/dev/cb_edp.model.rst new file mode 100644 index 0000000..db9ca27 --- /dev/null +++ b/docs/dev/cb_edp.model.rst @@ -0,0 +1,38 @@ +cb\_edp.model package +===================== + +Submodules +---------- + +cb\_edp.model.catalogue module +------------------------------ + +.. automodule:: cb_edp.model.catalogue + :members: + :undoc-members: + :show-inheritance: + +cb\_edp.model.dataset module +---------------------------- + +.. automodule:: cb_edp.model.dataset + :members: + :undoc-members: + :show-inheritance: + +cb\_edp.model.resource module +----------------------------- + +.. automodule:: cb_edp.model.resource + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: cb_edp.model + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/dev/cb_edp.rdf.rst b/docs/dev/cb_edp.rdf.rst new file mode 100644 index 0000000..0609525 --- /dev/null +++ b/docs/dev/cb_edp.rdf.rst @@ -0,0 +1,22 @@ +cb\_edp.rdf package +=================== + +Submodules +---------- + +cb\_edp.rdf.serializer module +----------------------------- + +.. automodule:: cb_edp.rdf.serializer + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: cb_edp.rdf + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/dev/cb_edp.rst b/docs/dev/cb_edp.rst new file mode 100644 index 0000000..f10047a --- /dev/null +++ b/docs/dev/cb_edp.rst @@ -0,0 +1,42 @@ +cb\_edp package +=============== + +Subpackages +----------- + +.. toctree:: + + cb_edp.api + cb_edp.conf + cb_edp.errors + cb_edp.model + cb_edp.rdf + cb_edp.utils + +Submodules +---------- + +cb\_edp.commands module +----------------------- + +.. automodule:: cb_edp.commands + :members: + :undoc-members: + :show-inheritance: + +cb\_edp.edp module +------------------ + +.. automodule:: cb_edp.edp + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: cb_edp + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/dev/cb_edp.utils.rst b/docs/dev/cb_edp.utils.rst new file mode 100644 index 0000000..3cc50fe --- /dev/null +++ b/docs/dev/cb_edp.utils.rst @@ -0,0 +1,38 @@ +cb\_edp.utils package +===================== + +Submodules +---------- + +cb\_edp.utils.helpers module +---------------------------- + +.. automodule:: cb_edp.utils.helpers + :members: + :undoc-members: + :show-inheritance: + +cb\_edp.utils.messages module +----------------------------- + +.. automodule:: cb_edp.utils.messages + :members: + :undoc-members: + :show-inheritance: + +cb\_edp.utils.validators module +------------------------------- + +.. automodule:: cb_edp.utils.validators + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: cb_edp.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/dev/modules.rst b/docs/dev/modules.rst new file mode 100644 index 0000000..0988660 --- /dev/null +++ b/docs/dev/modules.rst @@ -0,0 +1,7 @@ +cb_edp +====== + +.. toctree:: + :maxdepth: 4 + + cb_edp diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..b612224 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,20 @@ +.. CB-EDP documentation master file, created by + sphinx-quickstart on Tue Sep 17 16:17:51 2019. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to CB-EDP's documentation! +================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..922152e --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..75adf29 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +Click==7.0 +configobj==5.0.6 +Flask==1.0.2 +gunicorn==19.9.0 +requests==2.22.0 +time_uuid==0.2.0 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..3fe21ab --- /dev/null +++ b/setup.py @@ -0,0 +1,21 @@ +from setuptools import setup, find_packages + +config = { + 'description': 'FIWARE Context Broker instance integration with the EDP', + 'author': 'CEF Digital', + 'url': 'https://github.com/ConnectingEurope/ContextBroker-EDP', + 'version': '1.0', + 'install_requires': ['Click', 'configobj', 'Flask', 'gunicorn', 'requests', 'time_uuid'], + 'packages': find_packages(exclude=['ez_setup', 'tests', 'tests.*']), + 'package_data': {'': ['logger.yml', 'integrated.ini', 'template.ini', 'template.xml', 'api/templates/error.html']}, + 'include_package_data': True, + 'py_modules': ['cb_edp'], + 'name': 'cb-edp', + 'entry_points': ''' + [console_scripts] + cb-edp=cb_edp.commands:cli + ''' +} + +# Add in any extra build steps for cython, etc. +setup(**config)