Skip to content

Commit

Permalink
migrate: Migration of the stats.
Browse files Browse the repository at this point in the history
  • Loading branch information
psaiz committed Sep 13, 2023
1 parent 850ac44 commit 76ca506
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 2 deletions.
39 changes: 39 additions & 0 deletions invenio_stats/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@

import click
from dateutil.parser import parse as dateutil_parse
from flask import current_app
from flask.cli import with_appcontext
from invenio_search.engine import search
from invenio_search.proxies import current_search_client
from werkzeug.local import LocalProxy

from .proxies import current_stats
Expand Down Expand Up @@ -164,3 +167,39 @@ def _aggregations_list_bookmarks(
click.echo("{}:".format(a))
for b in bookmarks:
click.echo(" - {}".format(b.date))


@stats.command("migrate_zenodo")
@with_appcontext
def _migrate():
"""Migrate the statistics from zenodo."""
legacy_indices = current_search_client.cat.indices("legacy*", format="json")
i = 0
total = len(legacy_indices)
for my_index in sorted(legacy_indices, key=lambda d: d["index"]):
print("%i/%i Doing index: %s" % (i, total, my_index["index"]))
i += 1
target = my_index["index"].replace("legacy-", "")
source = my_index["index"]
try:
old = current_search_client.count({}, source)
new = current_search_client.count({}, target)
if old == new:
print("\tThe target has the same number of entries. Skipping")
continue
except search.exceptions.NotFoundError:
pass
try:
current_search_client.reindex(
{
"source": {"index": my_index["index"]},
"dest": {"index": target},
"script": {
"lang": "painless",
"source": 'ctx._source.remove("conceptdoi");ctx._source.remove( "resource_type");ctx._source.remove("access_right");ctx._source.remove("bucket_id");ctx._source.remove("file_key");ctx._source.remove("referrer");ctx._source.remove("size");ctx._source.remove("file_id");ctx._source.remove("conceptrecid");ctx._source.remove("recid");ctx._source.remove("doi")',
},
}
)
except Exception as d:
print("NOPE")
print(d)
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@
},
"size": {
"type": "double"
},
"parent_recid": {
"type": "keyword"
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
}
],
"date_detection": false,
"dynamic": false,
"dynamic": "strict",
"numeric_detection": false,
"properties": {
"timestamp": {
Expand Down Expand Up @@ -54,6 +54,12 @@
},
"size": {
"type": "double"
},
"user_id": {
"type": "double"
},
"parent_recid": {
"type": "keyword"
}
}
},
Expand Down
3 changes: 3 additions & 0 deletions invenio_stats/contrib/file_download/v7/file-download-v1.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@
},
"size": {
"type": "double"
},
"parent_recid": {
"type": "keyword"
}
}
},
Expand Down
3 changes: 3 additions & 0 deletions invenio_stats/contrib/record_view/os-v1/record-view-v1.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
},
"unique_session_id": {
"type": "keyword"
},
"parent_recid": {
"type": "keyword"
}
}
},
Expand Down
17 changes: 16 additions & 1 deletion invenio_stats/contrib/record_view/os-v2/record-view-v1.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
},
"mappings": {
"date_detection": false,
"dynamic": false,
"dynamic": "strict",
"numeric_detection": false,
"properties": {
"timestamp": {
Expand Down Expand Up @@ -35,11 +35,26 @@
"is_robot": {
"type": "boolean"
},
"machine": {
"type": "boolean"
},
"unique_id": {
"type": "keyword"
},
"unique_session_id": {
"type": "keyword"
},
"communities": {
"type": "keyword"
},
"countries": {
"type": "keyword"
},
"owners": {
"type": "keyword"
},
"parent_recid": {
"type": "keyword"
}
}
},
Expand Down
3 changes: 3 additions & 0 deletions invenio_stats/contrib/record_view/v7/record-view-v1.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
},
"unique_session_id": {
"type": "keyword"
},
"parent_recid": {
"type": "keyword"
}
}
},
Expand Down

0 comments on commit 76ca506

Please sign in to comment.