Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sort Library documents by date added #170

Merged
merged 19 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,30 @@ docker run -d -e POSTGRES_USER="postgres" -e POSTGRES_PASSWORD="postgres" -p 543
docker exec -it postgres bash -c "psql -c \"CREATE ROLE biblib_service WITH LOGIN PASSWORD 'biblib_service';\""
docker exec -it postgres bash -c "psql -c \"CREATE DATABASE biblib_service;\""
docker exec -it postgres bash -c "psql -c \"GRANT CREATE ON DATABASE biblib_service TO biblib_service;\""
python3 manage.py createdb
```

Once the database has been created, `alembic` can be used to upgrade the database to the correct alembic revision
```bash
#In order for alembic to have access to the models metadata, the biblib-service directory must be added to the PYTHONPATH
export PYTHONPATH=$(pwd):$PYTHONPATH
alembic upgrade head
```

A new revision can be created by doing the following:
```bash
#In order for alembic to have access to the models metadata, the biblib-service directory must be added to the PYTHONPATH
export PYTHONPATH=$(pwd):$PYTHONPATH
alembic revision -m "<revision-name>" --autogenerate
```

A test version of the microservice can then be deployed using
```bash
python3 wsgi.py
```

## deployment

The only thing to take care of when making a deployment is the migration of the backend database. Libraries uses specific features of PostgreSQL, such as `UUID` and `JSON`-store, so you should think carefully if you wish to change the backend. The use of `flask-migrate` for database migrations has been replaced by directly calling `alembic`.
The only thing to take care of when making a deployment is the migration of the backend database. Libraries uses specific features of PostgreSQL, such as `UUID` and `JSON`-store, so you should think carefully if you wish to change the backend. **The use of `flask-migrate` for database migrations has been replaced by directly calling `alembic`.**

## Feature additions

Expand Down
5 changes: 3 additions & 2 deletions alembic/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from alembic import context
from sqlalchemy import engine_from_config, pool
from logging.config import fileConfig
import biblib.models
import os
import sys

Expand All @@ -20,8 +21,8 @@
#from flask import current_app
#config.set_main_option('sqlalchemy.url', current_app.config.get('SQLALCHEMY_BINDS')['libraries'])
#target_metadata = current_app.extensions['migrate'].db.metadata
target_metadata = None

target_metadata = biblib.models.Base.metadata
tjacovich marked this conversation as resolved.
Show resolved Hide resolved
#target_metadata = None
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
Expand Down
2 changes: 1 addition & 1 deletion biblib/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def add_bibcodes(self, bibcodes):
"""
if not self.bibcode:
self.bibcode = {}
[self.bibcode.setdefault(item, {}) for item in bibcodes]
[self.bibcode.setdefault(item, {"timestamp": datetime.timestamp(datetime.now())}) for item in bibcodes]
tjacovich marked this conversation as resolved.
Show resolved Hide resolved

def remove_bibcodes(self, bibcodes):
"""
Expand Down
2 changes: 1 addition & 1 deletion biblib/tests/unit_tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_adding_bibcode_if_not_commited_to_library(self):
session.add(lib)
session.commit()

self.assertEqual(lib.bibcode, {k: {} for k in bibcodes_list})
self.assertEqual(lib.bibcode, {k: {"timestamp":lib.bibcode[k]["timestamp"]} for k in bibcodes_list})
self.assertUnsortedEqual(lib.get_bibcodes(), bibcodes_list)

def test_removing_bibcodes_from_library(self):
Expand Down
7 changes: 6 additions & 1 deletion biblib/tests/unit_tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1713,6 +1713,11 @@ def test_user_can_add_to_library(self):
library = session.query(Library).filter(Library.id == library_id).all()
for _lib in library:
self.assertIn(list(self.stub_library_2.bibcode.keys())[0], _lib.bibcode)
#Check that timestamps have been assigned
for _lib in library:
for bib in _lib.bibcode:
self.assertIn("timestamp", _lib.bibcode[bib].keys())
self.assertEqual(type(_lib.bibcode[bib]["timestamp"]), float)

def test_user_cannot_duplicate_same_document_in_library(self):
"""
Expand Down Expand Up @@ -3591,7 +3596,7 @@ def test_can_upsert_a_library_into_database(self):

with self.app.session_scope() as session:
library = session.query(Library).filter(Library.name == self.stub_library.name).one()
self.assertEqual(library.bibcode, self.stub_library.bibcode)
self.assertEqual(library.bibcode.keys(), self.stub_library.bibcode.keys())

def test_can_upsert_a_library_when_the_names_match(self):
"""
Expand Down
80 changes: 80 additions & 0 deletions biblib/tests/unit_tests/test_webservices.py
Original file line number Diff line number Diff line change
Expand Up @@ -1092,6 +1092,86 @@ def test_remove_document_from_library(self):
self.assertTrue(len(response.json['documents']) == 0,
response.json['documents'])

def test_timestamp_sort_returns_correct_order(self):
"""
Test the /libraries/<> end point with get documents by timestamp

:return: no return
"""

# Stub data
stub_user = UserShop()
stub_library = LibraryShop()

# Make the library
url = url_for('userview')
response = self.client.post(
url,
data=stub_library.user_view_post_data_json,
headers=stub_user.headers
)
self.assertEqual(response.status_code, 200)
for key in ['name', 'id']:
self.assertIn(key, response.json)

# Get the library ID
library_id = response.json['id']
# Add to the library
url = url_for('documentview', library=library_id)
with MockSolrQueryService(canonical_bibcode = json.loads(stub_library.document_view_post_data_json('add')).get('bibcode')) as SQ:
response = self.client.post(
url,
data=stub_library.document_view_post_data_json('add'),
headers=stub_user.headers
)
self.assertEqual(response.json['number_added'],
len(stub_library.bibcode))
self.assertEqual(response.status_code, 200)
full_bibcodes=json.loads(stub_library.document_view_post_data_json('add')).get('bibcode')

# Stub data
stub_library = LibraryShop(nb_codes=2)

with MockSolrQueryService(canonical_bibcode = json.loads(stub_library.document_view_post_data_json('add')).get('bibcode')) as SQ:
response = self.client.post(
url,
data=stub_library.document_view_post_data_json('add'),
headers=stub_user.headers
)
full_bibcodes+=json.loads(stub_library.document_view_post_data_json('add')).get('bibcode')
self.assertEqual(response.json['number_added'],
len(stub_library.bibcode))
self.assertEqual(response.status_code, 200)

# Check the library was created and documents exist
url = url_for('libraryview', library=library_id)

with MockSolrBigqueryService(
canonical_bibcode=stub_library.bibcode) as BQ, \
MockEmailService(stub_user, end_type='uid') as ES:
response = self.client.get(
url,
headers=stub_user.headers,
query_string={'sort': 'time asc'}
)

self.assertEqual(response.status_code, 200, response)
self.assertEqual(full_bibcodes,
response.json['documents'])

with MockSolrBigqueryService(
canonical_bibcode=stub_library.bibcode) as BQ, \
MockEmailService(stub_user, end_type='uid') as ES:
response = self.client.get(
url,
headers=stub_user.headers,
query_string={"sort": 'time desc'}
)

self.assertEqual(response.status_code, 200, response)
self.assertEqual(list(reversed(full_bibcodes)),
response.json['documents'])

def test_add_query_to_library(self):
"""
Test the /query/<> end point with POST to add a document
Expand Down
20 changes: 20 additions & 0 deletions biblib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

from collections import Counter
from datetime import datetime
import json

def get_GET_params(request, types={}):
Expand Down Expand Up @@ -79,6 +80,25 @@ def uniquify(input_list):
seen_add = seen.add
return [item for item in input_list if not (item in seen or seen_add(item))]

def convert_timestamp_datetime(timestamp, return_string=False):
"""
Takes a timestamp and converts it back to a datetime object.
input:
-------
timestamp (int): timestamp value
return_string (bool): returns the date as a string of the form YYYY-MM-DDTHH:MM:SS.ms if True

return:
-------
datetime_obj (datetime): datetime object
date_string (str): string repr of the datetime object YYYY-MM-DDTHH:MM:SS.ms
"""
datetime_obj = datetime.utcfromtimestamp(timestamp)
if return_string:
return datetime_obj.isoformat()
else:
return datetime_obj


def assert_unsorted_equal(s, t):
"""
Expand Down
69 changes: 66 additions & 3 deletions biblib/views/library_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
from biblib.models import User, Library, Permissions
from biblib.client import client
from biblib.views.base_view import BaseView
from datetime import datetime
from flask import request, current_app
from flask_discoverer import advertise
from sqlalchemy import Boolean
from sqlalchemy.orm.attributes import flag_modified
from biblib.views.http_errors import MISSING_USERNAME_ERROR, SOLR_RESPONSE_MISMATCH_ERROR, \
MISSING_LIBRARY_ERROR, NO_PERMISSION_ERROR, BAD_LIBRARY_ID_ERROR

Expand Down Expand Up @@ -138,6 +140,34 @@ def read_access(cls, service_uid, library_id):

return False

@staticmethod
def timestamp_sort(solr, library_id, reverse=False):
"""
Take a solr response and sort it based on the timestamps contained in the library
:input: response: response from SOLR bigquery
:input: library: The original library
tjacovich marked this conversation as resolved.
Show resolved Hide resolved
:input: reverse: returns library by `time desc` if true, `time asc` otherwise.

:return: response: SOLR response sorted by when each item was added.
"""
if "error" not in solr['response'].keys():
try:
with current_app.session_scope() as session:
# Find the specified library
library = session.query(Library).filter_by(id=library_id).one()
#First we generate a list of timestamps for the valid bibcodes
timestamp = [library.bibcode[doc['bibcode']]['timestamp'] for doc in solr['response']['docs']]
#Then we sort the SOLR response by the generated timestamp list
solr['response']['docs'] = [\
doc for (doc, timestamp) in sorted(zip(solr['response']['docs'], timestamp), reverse=reverse, key = lambda stamped: stamped[1])\
]
except Exception as e:
current_app.logger.warn("Failed to retrieve timestamps for {} with exception: {}. Returning default sorting.".format(library.id, e))
else:
current_app.logger.warn("SOLR bigquery returned status code {}. Stopping.".format(solr['response'].status_code))

return solr

@staticmethod
def solr_update_library(library_id, solr_docs):
"""
Expand Down Expand Up @@ -173,8 +203,13 @@ def solr_update_library(library_id, solr_docs):

with current_app.session_scope() as session:
library = session.query(Library).filter(Library.id == library_id).one()
for bibcode in library.bibcode:
default_timestamp = datetime.timestamp(library.date_created)

for bibcode in library.bibcode:
if "timestamp" not in library.bibcode[bibcode].keys():
update = True
library.bibcode[bibcode]["timestamp"] = default_timestamp

# Skip if its already canonical
if bibcode in canonical_bibcodes:
new_bibcode[bibcode] = library.bibcode[bibcode]
Expand All @@ -201,6 +236,7 @@ def solr_update_library(library_id, solr_docs):
# Update the database
library.bibcode = new_bibcode
session.add(library)
flag_modified(library, "bibcode")
tjacovich marked this conversation as resolved.
Show resolved Hide resolved
session.commit()

updates = dict(
Expand Down Expand Up @@ -276,6 +312,12 @@ def get(self, library):
- sort: 'date desc'
- fl: 'bibcode'

Additional Pagination options:
------------
- sort:
- "time asc" sort by time added to library with documents added least recently added documents being listed first.
- "time desc" sort by time added to library with the most recently added documents being listed first.

"""
try:
user = int(request.headers[USER_ID_KEYWORD])
Expand All @@ -301,6 +343,17 @@ def get(self, library):
raw_library = False

sort = request.args.get('sort', 'date desc')
#timestamp sorting is handled in biblib so we need to change the sort to something SOLR understands.
if sort in ['time asc', 'time desc']:
current_app.logger.debug("sort order is set to{}".format(sort))
if sort == 'time desc':
add_sort = True
else:
add_sort = False
sort = 'date desc'

else: add_sort = None

fl = request.args.get('fl', 'bibcode')
current_app.logger.info('User gave pagination parameters:'
'start: {}, '
Expand Down Expand Up @@ -355,6 +408,8 @@ def get(self, library):
library_id=library.id,
solr_docs=solr['response']['docs']
)
if add_sort:
solr = self.timestamp_sort(solr, library.id, reverse=add_sort)

documents = [i['bibcode'] for i in solr['response']['docs']]
else:
Expand All @@ -364,8 +419,16 @@ def get(self, library):
current_app.logger.warning('Problem with solr response: {0}'
.format(solr))
updates = {}
documents = library.get_bibcodes()
documents.sort()
if add_sort != None:
with current_app.session_scope() as session:
# Find the specified library (we have to do this to have full access to the library)
temp_library = session.query(Library).filter_by(id=library.id).one()
sortable_list = [(bibcode, library.bibcode[bibcode]["timestamp"]) for bibcode in temp_library.get_bibcodes()]
sortable_list.sort(key = lambda stamped: stamped[1], reverse=add_sort)
documents = [doc[0] for doc in sortable_list]
else:
documents = library.get_bibcodes()
documents.sort()
documents = documents[start:start+rows]

else:
Expand Down
Loading