From a1f0050b4309f4b0401169c03a552f1ecf7e9cbc Mon Sep 17 00:00:00 2001 From: Pablo Saiz Date: Wed, 13 Sep 2023 13:05:50 +0200 Subject: [PATCH] migrate: Migration of the stats. --- invenio_stats/cli.py | 39 +++++++++++++++++++ .../file_download/os-v1/file-download-v1.json | 3 ++ .../file_download/os-v2/file-download-v1.json | 8 +++- .../file_download/v7/file-download-v1.json | 3 ++ .../record_view/os-v1/record-view-v1.json | 3 ++ .../record_view/os-v2/record-view-v1.json | 17 +++++++- .../record_view/v7/record-view-v1.json | 3 ++ 7 files changed, 74 insertions(+), 2 deletions(-) diff --git a/invenio_stats/cli.py b/invenio_stats/cli.py index bd49086c..68a9ebdd 100644 --- a/invenio_stats/cli.py +++ b/invenio_stats/cli.py @@ -13,7 +13,10 @@ import click from dateutil.parser import parse as dateutil_parse +from flask import current_app from flask.cli import with_appcontext +from invenio_search.engine import search +from invenio_search.proxies import current_search_client from werkzeug.local import LocalProxy from .proxies import current_stats @@ -164,3 +167,39 @@ def _aggregations_list_bookmarks( click.echo("{}:".format(a)) for b in bookmarks: click.echo(" - {}".format(b.date)) + + +@stats.command("migrate_zenodo") +@with_appcontext +def _migrate(): + """Migrate the statistics from zenodo.""" + legacy_indices = current_search_client.cat.indices("legacy*", format="json") + i = 0 + total = len(legacy_indices) + for my_index in sorted(legacy_indices, key=lambda d: d["index"]): + print("%i/%i Doing index: %s" % (i, total, my_index["index"])) + i += 1 + target = my_index["index"].replace("legacy-", "") + source = my_index["index"] + try: + old = current_search_client.count({}, source) + new = current_search_client.count({}, target) + if old == new: + print("\tThe target has the same number of entries. Skipping") + continue + except search.exceptions.NotFoundError: + pass + try: + current_search_client.reindex( + { + "source": {"index": my_index["index"]}, + "dest": {"index": target}, + "script": { + "lang": "painless", + "source": 'ctx._source.remove("conceptdoi");ctx._source.remove( "resource_type");ctx._source.remove("access_right");ctx._source.remove("bucket_id");ctx._source.remove("file_key");ctx._source.remove("referrer");ctx._source.remove("size");ctx._source.remove("file_id");ctx._source.remove("conceptrecid");ctx._source.remove("recid");ctx._source.remove("doi")', + }, + } + ) + except Exception as d: + print("NOPE") + print(d) diff --git a/invenio_stats/contrib/file_download/os-v1/file-download-v1.json b/invenio_stats/contrib/file_download/os-v1/file-download-v1.json index b7965963..95392809 100644 --- a/invenio_stats/contrib/file_download/os-v1/file-download-v1.json +++ b/invenio_stats/contrib/file_download/os-v1/file-download-v1.json @@ -54,6 +54,9 @@ }, "size": { "type": "double" + }, + "parent_recid": { + "type": "keyword" } } }, diff --git a/invenio_stats/contrib/file_download/os-v2/file-download-v1.json b/invenio_stats/contrib/file_download/os-v2/file-download-v1.json index b7965963..f1d9a31d 100644 --- a/invenio_stats/contrib/file_download/os-v2/file-download-v1.json +++ b/invenio_stats/contrib/file_download/os-v2/file-download-v1.json @@ -18,7 +18,7 @@ } ], "date_detection": false, - "dynamic": false, + "dynamic": "strict", "numeric_detection": false, "properties": { "timestamp": { @@ -54,6 +54,12 @@ }, "size": { "type": "double" + }, + "user_id": { + "type": "double" + }, + "parent_recid": { + "type": "keyword" } } }, diff --git a/invenio_stats/contrib/file_download/v7/file-download-v1.json b/invenio_stats/contrib/file_download/v7/file-download-v1.json index b7965963..95392809 100644 --- a/invenio_stats/contrib/file_download/v7/file-download-v1.json +++ b/invenio_stats/contrib/file_download/v7/file-download-v1.json @@ -54,6 +54,9 @@ }, "size": { "type": "double" + }, + "parent_recid": { + "type": "keyword" } } }, diff --git a/invenio_stats/contrib/record_view/os-v1/record-view-v1.json b/invenio_stats/contrib/record_view/os-v1/record-view-v1.json index 4cee71cb..c311cfc0 100644 --- a/invenio_stats/contrib/record_view/os-v1/record-view-v1.json +++ b/invenio_stats/contrib/record_view/os-v1/record-view-v1.json @@ -40,6 +40,9 @@ }, "unique_session_id": { "type": "keyword" + }, + "parent_recid": { + "type": "keyword" } } }, diff --git a/invenio_stats/contrib/record_view/os-v2/record-view-v1.json b/invenio_stats/contrib/record_view/os-v2/record-view-v1.json index 4cee71cb..ec1ce3eb 100644 --- a/invenio_stats/contrib/record_view/os-v2/record-view-v1.json +++ b/invenio_stats/contrib/record_view/os-v2/record-view-v1.json @@ -7,7 +7,7 @@ }, "mappings": { "date_detection": false, - "dynamic": false, + "dynamic": "strict", "numeric_detection": false, "properties": { "timestamp": { @@ -35,11 +35,26 @@ "is_robot": { "type": "boolean" }, + "machine": { + "type": "boolean" + }, "unique_id": { "type": "keyword" }, "unique_session_id": { "type": "keyword" + }, + "communities": { + "type": "keyword" + }, + "countries": { + "type": "keyword" + }, + "owners": { + "type": "keyword" + }, + "parent_recid": { + "type": "keyword" } } }, diff --git a/invenio_stats/contrib/record_view/v7/record-view-v1.json b/invenio_stats/contrib/record_view/v7/record-view-v1.json index 4cee71cb..c311cfc0 100644 --- a/invenio_stats/contrib/record_view/v7/record-view-v1.json +++ b/invenio_stats/contrib/record_view/v7/record-view-v1.json @@ -40,6 +40,9 @@ }, "unique_session_id": { "type": "keyword" + }, + "parent_recid": { + "type": "keyword" } } },