diff --git a/invenio_stats/cli.py b/invenio_stats/cli.py index bd49086c..f0a1f228 100644 --- a/invenio_stats/cli.py +++ b/invenio_stats/cli.py @@ -13,7 +13,11 @@ import click from dateutil.parser import parse as dateutil_parse +from datetime import datetime +from flask import current_app from flask.cli import with_appcontext +from invenio_search.engine import search +from invenio_search.proxies import current_search_client from werkzeug.local import LocalProxy from .proxies import current_stats @@ -164,3 +168,80 @@ def _aggregations_list_bookmarks( click.echo("{}:".format(a)) for b in bookmarks: click.echo(" - {}".format(b.date)) + + +@stats.command("migrate_zenodo") +@with_appcontext +def _migrate(): + """Migrate the statistics from zenodo.""" + print("Checking if there are any `legacy` indices") + my_date = datetime.utcnow().isoformat() + painless = f'ctx._source.parent_recid=ctx._source.conceptrecid;ctx._source.updated_timestamp="{my_date}";' + # Removing obsolete fields + for f in [ + "conceptdoi", + "resource_type", + "access_right", + "referrer", + "size", + "conceptrecid", + "doi", + "is_parent", + "owners", + "communities", + ]: + painless += f'ctx._source.remove("{f}");' + legacy_indices = current_search_client.cat.indices("legacy*", format="json") + i = 0 + total = len(legacy_indices) + for my_index in sorted(legacy_indices, key=lambda d: d["index"]): + if my_index["index"] == "legacy-zenodo-stats-bookmarks": + continue + print("%i/%i Doing index: %s" % (i, total, my_index["index"])) + i += 1 + target = my_index["index"].replace("legacy-zenodo", "zenodo-prod") + source = my_index["index"] + try: + old = current_search_client.count({}, source) + new = current_search_client.count({}, target) + if old == new: + print("\tThe target has the same number of entries. Skipping") + continue + except search.exceptions.NotFoundError: + pass + try: + print( + { + "conflicts": "proceed", + "source": { + "index": my_index["index"], + "query": { + "bool": {"must_not": [{"term": {"is_parent": True}}]} + }, + }, + "dest": {"index": target, "op_type": "create"}, + "script": { + "lang": "painless", + "source": painless, + }, + } + ) + current_search_client.reindex( + { + "conflicts": "proceed", + "source": { + "index": my_index["index"], + "query": { + "bool": {"must_not": [{"term": {"is_parent": True}}]} + }, + }, + "dest": {"index": target, "op_type": "create"}, + "script": { + "lang": "painless", + "source": painless, + }, + } + ) + except Exception as d: + print("NOPE") + print(d) diff --git a/invenio_stats/contrib/file_download/os-v1/file-download-v1.json b/invenio_stats/contrib/file_download/os-v1/file-download-v1.json index b7965963..95392809 100644 --- a/invenio_stats/contrib/file_download/os-v1/file-download-v1.json +++ b/invenio_stats/contrib/file_download/os-v1/file-download-v1.json @@ -54,6 +54,9 @@ }, "size": { "type": "double" + }, + "parent_recid": { + "type": "keyword" } } }, diff --git a/invenio_stats/contrib/file_download/os-v2/file-download-v1.json b/invenio_stats/contrib/file_download/os-v2/file-download-v1.json index b7965963..f1d9a31d 100644 --- a/invenio_stats/contrib/file_download/os-v2/file-download-v1.json +++ b/invenio_stats/contrib/file_download/os-v2/file-download-v1.json @@ -18,7 +18,7 @@ } ], "date_detection": false, - "dynamic": false, + "dynamic": "strict", "numeric_detection": false, "properties": { "timestamp": { @@ -54,6 +54,12 @@ }, "size": { "type": "double" + }, + "user_id": { + "type": "double" + }, + "parent_recid": { + "type": "keyword" } } }, diff --git a/invenio_stats/contrib/file_download/v7/file-download-v1.json b/invenio_stats/contrib/file_download/v7/file-download-v1.json index b7965963..95392809 100644 --- a/invenio_stats/contrib/file_download/v7/file-download-v1.json +++ b/invenio_stats/contrib/file_download/v7/file-download-v1.json @@ -54,6 +54,9 @@ }, "size": { "type": "double" + }, + "parent_recid": { + "type": "keyword" } } }, diff --git a/invenio_stats/contrib/record_view/os-v1/record-view-v1.json b/invenio_stats/contrib/record_view/os-v1/record-view-v1.json index 4cee71cb..c311cfc0 100644 --- a/invenio_stats/contrib/record_view/os-v1/record-view-v1.json +++ b/invenio_stats/contrib/record_view/os-v1/record-view-v1.json @@ -40,6 +40,9 @@ }, "unique_session_id": { "type": "keyword" + }, + "parent_recid": { + "type": "keyword" } } }, diff --git a/invenio_stats/contrib/record_view/os-v2/record-view-v1.json b/invenio_stats/contrib/record_view/os-v2/record-view-v1.json index 4cee71cb..ec1ce3eb 100644 --- a/invenio_stats/contrib/record_view/os-v2/record-view-v1.json +++ b/invenio_stats/contrib/record_view/os-v2/record-view-v1.json @@ -7,7 +7,7 @@ }, "mappings": { "date_detection": false, - "dynamic": false, + "dynamic": "strict", "numeric_detection": false, "properties": { "timestamp": { @@ -35,11 +35,26 @@ "is_robot": { "type": "boolean" }, + "machine": { + "type": "boolean" + }, "unique_id": { "type": "keyword" }, "unique_session_id": { "type": "keyword" + }, + "communities": { + "type": "keyword" + }, + "countries": { + "type": "keyword" + }, + "owners": { + "type": "keyword" + }, + "parent_recid": { + "type": "keyword" } } }, diff --git a/invenio_stats/contrib/record_view/v7/record-view-v1.json b/invenio_stats/contrib/record_view/v7/record-view-v1.json index 4cee71cb..c311cfc0 100644 --- a/invenio_stats/contrib/record_view/v7/record-view-v1.json +++ b/invenio_stats/contrib/record_view/v7/record-view-v1.json @@ -40,6 +40,9 @@ }, "unique_session_id": { "type": "keyword" + }, + "parent_recid": { + "type": "keyword" } } },