From 6169c4554680f7c9c6dab85dc83ec09980b4c05e Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Thu, 29 Feb 2024 14:30:57 -0500 Subject: [PATCH 01/71] search within cluster revisions (#593) * Don't include keyword search term when linking to cluster search fixes #545 * Adjust result language so it is accurate when searching within a cluster fixes #545 * Update unit tests for change to language and cluster search link --- ppa/archive/templates/archive/snippets/cluster_work.html | 4 ++-- ppa/archive/templates/archive/snippets/search_form.html | 2 +- ppa/archive/tests/test_views.py | 8 ++++++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/ppa/archive/templates/archive/snippets/cluster_work.html b/ppa/archive/templates/archive/snippets/cluster_work.html index 20881b55..8a0fdad2 100644 --- a/ppa/archive/templates/archive/snippets/cluster_work.html +++ b/ppa/archive/templates/archive/snippets/cluster_work.html @@ -1,6 +1,6 @@ {# NOTE: expects cluster_id to be passed in #}
{# TODO: include count if we can easily get it ; search and browse x digitized works within cluster #} - {# ONLY include keyword search parameter, no other filters or sort options #} - search and browse within cluster + {# do NOT include any search parameters, to avoid unintentionally hiding cluster results #} + search and browse within cluster
diff --git a/ppa/archive/templates/archive/snippets/search_form.html b/ppa/archive/templates/archive/snippets/search_form.html index 706db13c..85ac6d61 100644 --- a/ppa/archive/templates/archive/snippets/search_form.html +++ b/ppa/archive/templates/archive/snippets/search_form.html @@ -82,7 +82,7 @@
-

Displaying {{ paginator.count|intcomma }} digitized work{{ paginator.count|pluralize }} or clusters of works

+

Displaying {{ paginator.count|intcomma }} digitized work{{ paginator.count|pluralize }}{% if not search_form.cluster.value %} or clusters of works{% endif %}

Work citations can be exported to Zotero

diff --git a/ppa/archive/tests/test_views.py b/ppa/archive/tests/test_views.py index 3091f207..2d5ec96a 100644 --- a/ppa/archive/tests/test_views.py +++ b/ppa/archive/tests/test_views.py @@ -631,10 +631,10 @@ def test_keyword_search(self): self.assertContains(response, "search and browse within cluster", count=1) - # link preserves keyword arg only but not any other parameters + # cluster link should not preserve ANY search parameters self.assertContains( response, - "search and browse within cluster", # noqa: E501 + "search and browse within cluster", # noqa: E501 html=True, ) self.assertNotContains( @@ -763,6 +763,10 @@ def test_search_within_cluster(self): self.assertContains( response, "You are searching and browsing within a cluster." ) + # this cluster only has one record + self.assertContains(response, "Displaying 1 digitized work") + # search within cluster should not report containing clusters of works + self.assertNotContains(response, "or clusters of works") # should link back to main archive search self.assertContains(response, reverse("archive:list")) From 9a25d258909e868fb6aa57fd1c4448688480ff3f Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 29 Feb 2024 14:53:16 -0500 Subject: [PATCH 02/71] Don't display uncategorized collection in search if facet count is zero fixes #542 --- ppa/archive/forms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ppa/archive/forms.py b/ppa/archive/forms.py index 608233fa..37ba3e99 100644 --- a/ppa/archive/forms.py +++ b/ppa/archive/forms.py @@ -400,8 +400,8 @@ def set_choices_from_facets(self, facets): choices.append((itervalue, label)) # if there are any items not in a collection, add an option - # so they will be findable - if NO_COLLECTION_LABEL in facet_dict: + # so they will be findable; only include if facet count is non-zero + if facet_dict.get(NO_COLLECTION_LABEL, 0): choices.append( ( ModelMultipleChoiceFieldWithEmpty.EMPTY_ID, From ff397b5fc6ff374b251acd358c3dc4cf2c9cd430 Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Tue, 5 Mar 2024 15:12:40 -0500 Subject: [PATCH 03/71] Manage command to update hathitrust page counts (#594); do not save object in count_pages method * Manage command to update hathitrust page counts * Update ppa/archive/management/commands/update_hathi_pagecounts.py Co-authored-by: Laure Thompson <602628+laurejt@users.noreply.github.com> * Revise count_pages method so it does not automatically save the object * Clean up formatting and remove unused import per @laurejt feedback --------- Co-authored-by: Laure Thompson <602628+laurejt@users.noreply.github.com> --- ppa/archive/import_util.py | 7 +- .../management/commands/hathi_import.py | 25 ++++-- .../commands/update_hathi_pagecounts.py | 87 +++++++++++++++++++ ppa/archive/models.py | 13 ++- ppa/archive/tests/test_models.py | 5 +- 5 files changed, 118 insertions(+), 19 deletions(-) create mode 100644 ppa/archive/management/commands/update_hathi_pagecounts.py diff --git a/ppa/archive/import_util.py b/ppa/archive/import_util.py index 07180bdf..84f4e176 100644 --- a/ppa/archive/import_util.py +++ b/ppa/archive/import_util.py @@ -173,7 +173,8 @@ class HathiImporter(DigitizedWorkImporter): hathi.HathiItemForbidden: "Permission denied to download data.", RSYNC_ERROR: "Failed to sync data", # only saw this one on day, but this was what it was - JSONDecodeError: "HathiTrust catalog temporarily unavailable (malformed response).", + JSONDecodeError: "HathiTrust catalog temporarily unavailable " + + "(malformed response).", } ) @@ -256,7 +257,6 @@ def rsync_data(self): # temporary preserve file for dev delete=False, ) as fp: - file_paths = list(self.pairtree_paths.values()) # sorting makes rsync more efficient file_paths.sort() @@ -337,6 +337,9 @@ def import_digitizedwork(self, htid, log_msg_src, user): if digwork: # populate page count digwork.count_pages() + # save the page count to the database + if digwork.has_changed("page_count"): + digwork.save() self.imported_works.append(digwork) self.results[htid] = self.SUCCESS diff --git a/ppa/archive/management/commands/hathi_import.py b/ppa/archive/management/commands/hathi_import.py index ce17fa7d..817fd6d0 100644 --- a/ppa/archive/management/commands/hathi_import.py +++ b/ppa/archive/management/commands/hathi_import.py @@ -122,9 +122,7 @@ def handle(self, *args, **kwargs): if self.options["progress"]: progbar = progressbar.ProgressBar( - redirect_stdout=True, - max_value=self.stats["total"], - max_error=False + redirect_stdout=True, max_value=self.stats["total"], max_error=False ) else: progbar = None @@ -148,7 +146,13 @@ def handle(self, *args, **kwargs): # count pages in the pairtree zip file and update digwork page count try: self.stats["pages"] += digwork.count_pages() - except (storage_exceptions.ObjectNotFoundException, IndexError): # IndexError on filepath + # update page count in the database + if digwork.has_changed("page_count"): + digwork.save() + except ( + storage_exceptions.ObjectNotFoundException, + IndexError, + ): # IndexError on filepath self.stderr.write("%s not found in datastore" % digwork.source_id) if progbar: @@ -156,7 +160,8 @@ def handle(self, *args, **kwargs): summary = ( "\nProcessed {:,d} item{} for import." - + "\nAdded {:,d}; updated {:,d}; skipped {:,d}; {:,d} error{}; imported {:,d} page{}." + + "\nAdded {:,d}; updated {:,d}; skipped {:,d}; " + + "{:,d} error{}; imported {:,d} page{}." ) summary = summary.format( self.stats["total"], @@ -172,7 +177,7 @@ def handle(self, *args, **kwargs): self.stdout.write(summary) def initialize_pairtrees(self): - """Initiaulize pairtree storage clients for each + """Initialize pairtree storage clients for each subdirectory in the configured **HATHI_DATA** path.""" # if the configured directory does not exist or is not @@ -192,8 +197,12 @@ def initialize_pairtrees(self): # may be in there, and so forth. if os.path.isdir(ht_data_dir): prefix = os.path.basename(ht_data_dir) - logger.debug(f'Initializing pair tree in ({ht_data_dir}) [prefix={prefix}]') - hathi_ptree = pairtree_client.PairtreeStorageClient(prefix, ht_data_dir) + logger.debug( + f"Initializing pair tree in ({ht_data_dir}) [prefix={prefix}]" + ) + hathi_ptree = pairtree_client.PairtreeStorageClient( + prefix, ht_data_dir + ) # store initialized pairtree client by prefix for later use self.hathi_pairtree[prefix] = hathi_ptree diff --git a/ppa/archive/management/commands/update_hathi_pagecounts.py b/ppa/archive/management/commands/update_hathi_pagecounts.py new file mode 100644 index 00000000..c1b172a0 --- /dev/null +++ b/ppa/archive/management/commands/update_hathi_pagecounts.py @@ -0,0 +1,87 @@ +from django.conf import settings +from django.core.management.base import BaseCommand +from django.contrib.admin.models import CHANGE, LogEntry +from django.contrib.auth.models import User +from django.contrib.contenttypes.models import ContentType +from pairtree import storage_exceptions +from parasolr.django.signals import IndexableSignalHandler + +from ppa.archive.models import DigitizedWork + + +class Command(BaseCommand): + """Update database page counts for non-excerpted HathiTrust digitized items. + By default, runs on all non-excerpted, public HathiTrust items. + """ + + help = __doc__ + + #: normal verbosity level + v_normal = 1 + #: verbosity level for the current run; defaults to 1 / normal + verbosity = v_normal + + def add_arguments(self, parser): + parser.add_argument( + "source_ids", nargs="*", help="List of specific items to update (optional)" + ) + + def handle(self, *args, **kwargs): + self.verbosity = kwargs.get("verbosity", self.verbosity) + source_ids = kwargs.get("source_ids", []) + # page count does not affect solr indexing, so disconnect signal handler + IndexableSignalHandler.disconnect() + + script_user = User.objects.get(username=settings.SCRIPT_USERNAME) + digwork_contentype = ContentType.objects.get_for_model(DigitizedWork) + + # find all non-excerpted, non-suppressed hathi volumes + hathi_vols = DigitizedWork.objects.filter( + source=DigitizedWork.HATHI, + item_type=DigitizedWork.FULL, + status=DigitizedWork.PUBLIC, + ) + # if source ids are specified, limit to those records only + if source_ids: + hathi_vols = hathi_vols.filter(source_id__in=source_ids) + + stats = {"updated": 0, "unchanged": 0, "missing_data": 0} + + for digwork in hathi_vols: + try: + # store the current page count + old_page_count = digwork.page_count + # recalculate page count from pairtree data + # NOTE: this method automatically saves if page count changes + digwork.page_count = digwork.count_pages() + if digwork.has_changed("page_count"): + digwork.save() + stats["updated"] += 1 + # create a log entry documenting page count change + LogEntry.objects.log_action( + user_id=script_user.pk, + content_type_id=digwork_contentype.pk, + object_id=digwork.pk, + object_repr=str(digwork), + change_message=f"Recalculated page count (was {old_page_count}, " + + f"now {digwork.page_count})", + action_flag=CHANGE, + ) + + else: + stats["unchanged"] += 1 + + except storage_exceptions.ObjectNotFoundException: + if self.verbosity >= self.v_normal: + self.stderr.write( + self.style.WARNING(f"Pairtree data for {digwork} not found") + ) + stats["missing_data"] += 1 + + # report a summary of what was done + if self.verbosity >= self.v_normal: + self.stdout.write( + f"Volumes with updated page count: {stats['updated']:,}" + + f"\n\tPage count unchanged: {stats['unchanged']:,}" + + f"\n\tMissing pairtree data: {stats['missing_data']:,}" + ) diff --git a/ppa/archive/models.py b/ppa/archive/models.py index 1e63123d..f7959722 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -252,7 +252,7 @@ def cluster_save(sender, instance, **kwargs): logger.debug( "cluster id has changed, reindexing %d works and %d pages", works.count(), - page_count["page_count"], + page_count.get("page_count", 0), ) DigitizedWork.index_items(works) # reindex pages (this may be slow...) @@ -907,7 +907,8 @@ def count_pages(self, ptree_client=None): number of files in the zipfile within the pairtree content (Hathi-specific). Raises :class:`pairtree.storage_exceptions.ObjectNotFoundException` if the data is not found in the pairtree storage. Returns page count - found; saves the object if the count changes.""" + found; updates the `page_count` attribute on the current instance, + but does NOT save the object.""" # if this item has a page span defined, calculate number of pages # based on the number of pages across all spans @@ -941,11 +942,9 @@ def count_pages(self, ptree_client=None): # NOTE: could also count pages via mets file, but that's slower # than counting via zipfile name list - # store page count in the database if changed - if self.page_count != page_count: - self.page_count = page_count - self.save() - + # update page count on the instance, but don't save changes + self.page_count = page_count + # return the total return page_count @property diff --git a/ppa/archive/tests/test_models.py b/ppa/archive/tests/test_models.py index 906ebfe2..2aaec020 100644 --- a/ppa/archive/tests/test_models.py +++ b/ppa/archive/tests/test_models.py @@ -633,8 +633,9 @@ def test_count_pages(self, mockzipfile): # return total and digitized work page counts updated assert page_count == 2 - digwork = DigitizedWork.objects.get(source_id=digwork.source_id) - assert digwork.page_count == 2 + # does NOT save automatically + db_digwork = DigitizedWork.objects.get(source_id=digwork.source_id) + assert db_digwork.page_count is None # should ignore non-text files page_files = ["0001.txt", "00002.txt", "00001.jp2", "00002.jp2"] From 9f514dd73fd75acef6ebf9692ced270db1caff89 Mon Sep 17 00:00:00 2001 From: Laure Thompson <602628+laurejt@users.noreply.github.com> Date: Tue, 5 Mar 2024 15:20:48 -0500 Subject: [PATCH 04/71] Fix for pairtree prefix and version file issue (#611) * Added code to create pairtree prefix and version files when the pairtree directories exist, but the files needed by the pairtree package do not. NOTE: This update only corrects the issue for HathiTrust imports via the admin interface but not bulk imports. * Refactored HathiObject for better handling of volume and pairtree identifiers. --- ppa/archive/hathi.py | 60 +++++++++++++++++++-------------- ppa/archive/tests/test_hathi.py | 59 +++++++++++++++++++++++++------- 2 files changed, 81 insertions(+), 38 deletions(-) diff --git a/ppa/archive/hathi.py b/ppa/archive/hathi.py index 4ff11beb..0ee723e2 100644 --- a/ppa/archive/hathi.py +++ b/ppa/archive/hathi.py @@ -242,36 +242,47 @@ class HathiObject: """An object for working with a HathiTrust item with data in a locally configured pairtree datastore.""" - hathi_id = None + # Pairtree version statement usd by pairtree package + pairtree_version_stmt = ( + "This directory conforms to Pairtree Version 0.1. Updated spec: " + + "http://www.cdlib.org/inside/diglib/pairtree/pairtreespec.html" + ) def __init__(self, hathi_id): + # HathiTrust record id self.hathi_id = hathi_id - - @cached_property - def pairtree_prefix(self): - """pairtree prefix (first portion of the hathi id, short-form - identifier for owning institution)""" - return self.hathi_id.split(".", 1)[0] - - @cached_property - def pairtree_id(self): - """pairtree identifier (second portion of source id)""" - return self.hathi_id.split(".", 1)[1] - - @cached_property - def content_dir(self): - """content directory for this work within the appropriate - pairtree""" - # contents are stored in a directory named based on a - # pairtree encoded version of the id - return pairtree_path.id_encode(self.pairtree_id) + # Identifiers for owning institution and volume which form the overall + # HathiTrust record id: [lib_id].[vol_id] + self.lib_id, self.vol_id = hathi_id.split(".", 1) + # Pairtree prefix + self.pairtree_prefix = f"{self.lib_id}." + # Content directory for this work within the appropriate pairtree + # which is based on a pairtree encoded version of the volume id + self.content_dir = pairtree_path.id_encode(self.vol_id) def pairtree_client(self): """Initialize a pairtree client for the pairtree datastore this - object belongs to, based on its Hathi prefix id.""" + object belongs to, based on its HathiTrust record id.""" + store_dir = os.path.join(settings.HATHI_DATA, self.lib_id) + + # Check if store_dir exists, check if pairtree files exist + if os.path.isdir(store_dir): + # Check if "pairtree_prefix" file exists. If not, create it. + pairtree_prefix_fn = os.path.join(store_dir, "pairtree_prefix") + if not os.path.isfile(pairtree_prefix_fn): + with open(pairtree_prefix_fn, mode='w') as writer: + writer.write(self.pairtree_prefix) + # Check if "pairtree_version0_1" file exists. If not, create it. + # Note: Mimicking paitree packages behavior. File contents are not + # actually verified + pairtree_vn_fn = os.path.join(store_dir, "pairtree_version0_1") + if not os.path.isfile(pairtree_vn_fn): + with open(pairtree_vn_fn, mode="w") as writer: + writer.write(self.pairtree_version_stmt) + return pairtree_client.PairtreeStorageClient( self.pairtree_prefix, - os.path.join(settings.HATHI_DATA, self.pairtree_prefix), + store_dir, ) def pairtree_object(self, ptree_client=None, create=False): @@ -287,13 +298,13 @@ def pairtree_object(self, ptree_client=None, create=False): ptree_client = self.pairtree_client() # return the pairtree object for current work - return ptree_client.get_object(self.pairtree_id, create_if_doesnt_exist=create) + return ptree_client.get_object(self.vol_id, create_if_doesnt_exist=create) def delete_pairtree_data(self): """Delete pairtree object from the pairtree datastore.""" logger.info("Deleting pairtree data for %s", self.hathi_id) try: - self.pairtree_client().delete_object(self.pairtree_id) + self.pairtree_client().delete_object(self.vol_id) except storage_exceptions.ObjectNotFoundException: # data is already gone; warn, but not an error logger.warning( @@ -314,7 +325,6 @@ def _content_path(self, ext, ptree_client=None): raise storage_exceptions.PartNotFoundException return os.path.join(pairtree_obj.id_to_dirpath(), self.content_dir, filepaths[0]) - def zipfile_path(self, ptree_client=None): """path to zipfile within the hathi contents for this work""" return self._content_path("zip", ptree_client=ptree_client) diff --git a/ppa/archive/tests/test_hathi.py b/ppa/archive/tests/test_hathi.py index 5b269143..3ef2aca2 100644 --- a/ppa/archive/tests/test_hathi.py +++ b/ppa/archive/tests/test_hathi.py @@ -1,5 +1,5 @@ import json -import os.path +import os import tempfile from datetime import date from unittest.mock import Mock, patch @@ -222,17 +222,50 @@ class TestHathiObject: ht_tempdir = tempfile.TemporaryDirectory(prefix="ht_text_pd") - def test_pairtree_prefix(self): + def test_init(self): hobj = hathi.HathiObject(hathi_id="uva.1234") - assert hobj.pairtree_prefix == "uva" + assert hobj.lib_id == "uva" + assert hobj.vol_id == "1234" + assert hobj.pairtree_prefix == "uva." + assert hobj.content_dir == pairtree_path.id_encode(hobj.vol_id) - def test_pairtree_id(self): - hobj = hathi.HathiObject(hathi_id="uva.1234") - assert hobj.pairtree_id == "1234" - - def test_content_dir(self): + @override_settings(HATHI_DATA=ht_tempdir.name) + def test_pairtree_client(self): hobj = hathi.HathiObject(hathi_id="uva.1234") - assert hobj.content_dir == pairtree_path.id_encode(hobj.pairtree_id) + store_dir = os.path.join(settings.HATHI_DATA, hobj.lib_id) + + # Case 1: Initialize client without directory + ptree_client = hobj.pairtree_client() + + # assert file "pairtree_prefix" exists with correct contents + ptree_pfx_fn = os.path.join(store_dir, "pairtree_prefix") + with open(ptree_pfx_fn) as reader: + ptree_pfx_contents = reader.read() + assert ptree_pfx_contents == hobj.pairtree_prefix + + # assert file "pairtree_version0_1" exists with correct contents + ptree_vn_fn = os.path.join(store_dir, "pairtree_version0_1") + with open(ptree_vn_fn) as reader: + ptree_vn_contents = reader.read() + assert ptree_vn_contents == hobj.pairtree_version_stmt + + # Case 2: initialize client with directory but without files + os.remove(ptree_pfx_fn) + os.remove(ptree_vn_fn) + + ptree_client = hobj.pairtree_client() + + # assert file "pairtree_prefix" exists with correct contents + ptree_pfx_fn = os.path.join(store_dir, "pairtree_prefix") + with open(ptree_pfx_fn) as reader: + ptree_pfx_contents = reader.read() + assert ptree_pfx_contents == hobj.pairtree_prefix + + # assert file "pairtree_version0_1" exists with correct contents + ptree_vn_fn = os.path.join(store_dir, "pairtree_version0_1") + with open(ptree_vn_fn) as reader: + ptree_vn_contents = reader.read() + assert ptree_vn_contents == hobj.pairtree_version_stmt @patch("ppa.archive.hathi.pairtree_client") @override_settings(HATHI_DATA=ht_tempdir.name) @@ -243,11 +276,11 @@ def test_pairtree_object(self, mock_pairtree_client): # client initialized mock_pairtree_client.PairtreeStorageClient.assert_called_with( hobj.pairtree_prefix, - os.path.join(settings.HATHI_DATA, hobj.pairtree_prefix), + os.path.join(settings.HATHI_DATA, hobj.lib_id), ) # object retrieved mock_pairtree_client.PairtreeStorageClient.return_value.get_object.assert_called_with( - hobj.pairtree_id, create_if_doesnt_exist=False + hobj.vol_id, create_if_doesnt_exist=False ) # object returned assert ( @@ -263,7 +296,7 @@ def test_pairtree_object(self, mock_pairtree_client): mock_pairtree_client.PairtreeStorageClient.assert_not_called() # should get object from my client my_ptree_client.get_object.assert_called_with( - hobj.pairtree_id, create_if_doesnt_exist=False + hobj.vol_id, create_if_doesnt_exist=False ) @override_settings(HATHI_DATA=ht_tempdir.name) @@ -330,7 +363,7 @@ def test_delete_pairtree_data(self): mock_pairtree_client.assert_called() # should call delete boject mock_pairtree_client.return_value.delete_object.assert_called_with( - hobj.pairtree_id + hobj.vol_id ) # should not raise an exception if deletion fails From 85bbf8f336fd5e6d0edcaa0dd0e1a7447c22207c Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 7 Mar 2024 09:34:03 -0500 Subject: [PATCH 05/71] Update codecov action for javascript unit tests to v4 --- .github/workflows/unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 1e348837..40922993 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -28,7 +28,7 @@ jobs: ${{ runner.os }}-node- - run: npm ci - run: npm run test:unit - - uses: codecov/codecov-action@v3 + - uses: codecov/codecov-action@v4 with: flags: javascript From 6fc542041568f1b36fda1f79f87a78ff5ffbdbe6 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 7 Mar 2024 10:53:42 -0500 Subject: [PATCH 06/71] Add method to get first page in original page range for excerpts - make first page method explicit that it is digital - test both digital and original first page methods --- ppa/archive/hathi.py | 30 ++++++++++++++++++++++-------- ppa/archive/models.py | 29 ++++++++++++++++++++++++++--- ppa/archive/tests/test_models.py | 13 +++++++++++++ 3 files changed, 61 insertions(+), 11 deletions(-) diff --git a/ppa/archive/hathi.py b/ppa/archive/hathi.py index 0ee723e2..eeb7bc18 100644 --- a/ppa/archive/hathi.py +++ b/ppa/archive/hathi.py @@ -189,7 +189,7 @@ class StructMapPage(_METS): - """ + """ # noqa: E501 @cached_property def display_label(self): @@ -244,9 +244,9 @@ class HathiObject: # Pairtree version statement usd by pairtree package pairtree_version_stmt = ( - "This directory conforms to Pairtree Version 0.1. Updated spec: " + - "http://www.cdlib.org/inside/diglib/pairtree/pairtreespec.html" - ) + "This directory conforms to Pairtree Version 0.1. Updated spec: " + + "http://www.cdlib.org/inside/diglib/pairtree/pairtreespec.html" + ) def __init__(self, hathi_id): # HathiTrust record id @@ -264,13 +264,13 @@ def pairtree_client(self): """Initialize a pairtree client for the pairtree datastore this object belongs to, based on its HathiTrust record id.""" store_dir = os.path.join(settings.HATHI_DATA, self.lib_id) - + # Check if store_dir exists, check if pairtree files exist if os.path.isdir(store_dir): # Check if "pairtree_prefix" file exists. If not, create it. pairtree_prefix_fn = os.path.join(store_dir, "pairtree_prefix") if not os.path.isfile(pairtree_prefix_fn): - with open(pairtree_prefix_fn, mode='w') as writer: + with open(pairtree_prefix_fn, mode="w") as writer: writer.write(self.pairtree_prefix) # Check if "pairtree_version0_1" file exists. If not, create it. # Note: Mimicking paitree packages behavior. File contents are not @@ -320,10 +320,12 @@ def _content_path(self, ext, ptree_client=None): parts = pairtree_obj.list_parts(self.content_dir) # find the first zipfile in the list (should only be one) filepaths = [part for part in parts if part.endswith(ext)] - if not filepaths: + if not filepaths: # An error has occurred -- there is no zip file here in parts raise storage_exceptions.PartNotFoundException - return os.path.join(pairtree_obj.id_to_dirpath(), self.content_dir, filepaths[0]) + return os.path.join( + pairtree_obj.id_to_dirpath(), self.content_dir, filepaths[0] + ) def zipfile_path(self, ptree_client=None): """path to zipfile within the hathi contents for this work""" @@ -332,3 +334,15 @@ def zipfile_path(self, ptree_client=None): def metsfile_path(self, ptree_client=None): """path to mets xml file within the hathi contents for this work""" return self._content_path(".mets.xml", ptree_client=ptree_client) + + def mets_xml(self) -> MinimalMETS: + """load METS xml file from pairtree and initialize as an instance + of :class:`MinimalMETS` + + :rtype: :class:`MinimalMETS` + :raises: :class:`storage_exceptions.ObjectNotFoundException` if the + object is not found in pairtree storage + :raises: :class:`storage_exceptions.PartNotFoundException` if the + mets.xml flie is not found in pairtree storage for this object + """ + return xmlmap.load_xmlobject_from_file(self.metsfile_path(), MinimalMETS) diff --git a/ppa/archive/models.py b/ppa/archive/models.py index f7959722..569f7175 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -11,7 +11,6 @@ from django.core.exceptions import ValidationError from django.db import models from django.urls import reverse -from eulxml.xmlmap import load_xmlobject_from_file from flags import Flags from intspan import ParseError as IntSpanParseError from intspan import intspan @@ -24,7 +23,7 @@ from wagtail.snippets.models import register_snippet from ppa.archive.gale import GaleAPI, MARCRecordNotFound, get_marc_record -from ppa.archive.hathi import HathiBibliographicAPI, HathiObject, MinimalMETS +from ppa.archive.hathi import HathiBibliographicAPI, HathiObject logger = logging.getLogger(__name__) @@ -811,9 +810,32 @@ def populate_from_bibdata(self, bibdata): def first_page(self): """Number of the first page in range, if this is an excerpt""" + # return digital page for now; may be switching to original + # or this method may be going away + return self.first_page_digital() + + def first_page_digital(self): + """Number of the first page in range (digital pages / page index), + if this is an excerpt. + + :return: first page number for digital page range; None if no page range + :rtype: int, None + """ if self.pages_digital: return list(self.page_span)[0] + def first_page_original(self): + """Number of the first page in range (original page numbering) + if this is an excerpt + + :return: first page number for original page range; None if no page range + :rtype: str, None + """ + # use regex since it handles all cases (intspan only works for a subset) + match = re.match(r"([\da-z]+)([,-]|\b)", self.pages_orig) + if match: + return match.group(1) + def index_id(self): """use source id + first page in range (if any) as solr identifier""" first_page = self.first_page() @@ -949,6 +971,7 @@ def count_pages(self, ptree_client=None): @property def page_span(self): + # TODO: relabel to make it explicit that this is digital pages? # convert the specified page numbers into an intspan # if empty, returns an empty set return intspan(self.pages_digital) @@ -1140,7 +1163,7 @@ def hathi_page_index_data(cls, digwork): # load mets record to pull metadata about the images try: - mmets = load_xmlobject_from_file(digwork.hathi.metsfile_path(), MinimalMETS) + mmets = digwork.hathi.mets_xml() except storage_exceptions.ObjectNotFoundException: logger.error( "Pairtree data for %s not found but status is %s", diff --git a/ppa/archive/tests/test_models.py b/ppa/archive/tests/test_models.py index 2aaec020..b97a9ef5 100644 --- a/ppa/archive/tests/test_models.py +++ b/ppa/archive/tests/test_models.py @@ -778,6 +778,19 @@ def test_page_range_validation(self): work.clean_fields() assert "start value should exceed stop (355-35)" in str(err) + def test_first_page_digital(self): + assert DigitizedWork(pages_digital="133-135").first_page_digital() == 133 + + def test_first_page_original(self): + # citation-style page range (second number is incomplete) + assert DigitizedWork(pages_orig="133-5").first_page_original() == "133" + # single page number + assert DigitizedWork(pages_orig="133").first_page_original() == "133" + # discontinuous page range + assert DigitizedWork(pages_orig="133, 134").first_page_original() == "133" + # roman numreals + assert DigitizedWork(pages_orig="iii-xiv").first_page_original() == "iii" + def test_is_suppressed(self): work = DigitizedWork(source_id="chi.79279237") assert not work.is_suppressed From 63d9079e9d6bb4cea93ae4dcf3e3617e635778c9 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 7 Mar 2024 11:17:06 -0500 Subject: [PATCH 07/71] Preliminary manage command to check on excerpt page range mismatches ref #560 --- .../commands/check_hathi_excerpts.py | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 ppa/archive/management/commands/check_hathi_excerpts.py diff --git a/ppa/archive/management/commands/check_hathi_excerpts.py b/ppa/archive/management/commands/check_hathi_excerpts.py new file mode 100644 index 00000000..ede456d4 --- /dev/null +++ b/ppa/archive/management/commands/check_hathi_excerpts.py @@ -0,0 +1,128 @@ +import csv + +from django.core.management.base import BaseCommand +from pairtree import storage_exceptions + +from intspan import intspan + +from ppa.archive.models import DigitizedWork +from ppa.archive.templatetags.ppa_tags import hathi_page_url + + +class Command(BaseCommand): + """Check page alignment for excerpted HathiTrust digitized items.""" + + help = __doc__ + + #: normal verbosity level + v_normal = 1 + #: verbosity level for the current run; defaults to 1 / normal + verbosity = v_normal + + def handle(self, *args, **kwargs): + self.verbosity = kwargs.get("verbosity", self.verbosity) + + # find all excerpted, non-suppressed hathi volumes + hathi_vols = DigitizedWork.objects.filter( + source=DigitizedWork.HATHI, + status=DigitizedWork.PUBLIC, + ).exclude(item_type=DigitizedWork.FULL) + + output_fields = [ + "source_id", + "unique_id", + "pages_orig", + "pages_digital", + "orig_label_match", + "pages_digital_corrected", + "old_hathi_start", + "new_hathi_start", + "notes", + ] + + with open("ppa-excerpt-pagecheck.csv", "w") as csvfile: + csvwriter = csv.DictWriter(csvfile, fieldnames=output_fields) + csvwriter.writeheader() + + for digwork in hathi_vols: + info = { + "source_id": digwork.source_id, + # source id + first page (currently digital, will be switching to original) + "unique_id": digwork.index_id(), + "pages_orig": digwork.pages_orig, + "pages_digital": digwork.pages_digital, + "old_hathi_start": hathi_page_url( + digwork.source_id, digwork.first_page_digital() + ), + } + # NOTE: mets loading copied from hathi_page_index_data method + # worth movint to a method on the hathi object? + try: + mmets = digwork.hathi.mets_xml() + except storage_exceptions.ObjectNotFoundException: + # document the error in the output csv, stop processing + info["notes"] = "pairtree data not found" + csvwriter.writerow(info) + continue + except storage_exceptions.PartNotFoundException: + info["notes"] = "error loading mets file (part not found)" + csvwriter.writerow(info) + continue + + # make a list of page labels and order from mets structmap + page_info = [ + {"order": page.order, "label": page.orderlabel} + # also have access to label (@LABEL vs @ORDERLABEL) + for page in mmets.structmap_pages + ] + + # use digital page range to get the first page in the mets + # that would be included with current digital range (1-based index) + try: + excerpt_first_page = page_info[digwork.first_page_digital() + 1] + except IndexError: + if digwork.first_page_digital() >= len(page_info): + excerpt_first_page[-1] + info["notes"] = "digital page out of range; trying last page" + + # some mets records don't have labels + # or, label attribute may be present but empty + # do we need to check if all pages are missing labels? + if ( + excerpt_first_page["label"] is None + or excerpt_first_page["label"].strip() == "" + ): + # add a note that mets doesn't have labels, stop processing + info["notes"] = "no page label in METS structmap" + csvwriter.writerow(info) + continue + + # check if METS page label for the first page in range + # matches the desired first original page + if excerpt_first_page["label"] != str(digwork.first_page_original()): + info["orig_label_match"] = "N" + # if they don't match, can we calculate the offset? + # (only works for numeric page labels) + try: + diff = int(digwork.first_page_original()) - int( + excerpt_first_page["label"] + ) + # calculate the expected new digital page range + # - apply the difference to each number in range, + # since we do have some discontinuous ranges + # - convert back to intspan so we can output in + # page range format (1-3 or 1-3,5) + new_range = [n + diff for n in digwork.page_span] + info["pages_digital_corrected"] = intspan(new_range) + info["new_hathi_start"] = hathi_page_url( + digwork.source_id, new_range[0] + ) + except ValueError as err: + info["notes"] = "could not calculate page offset (%s)" % err + + else: + info["orig_label_match"] = "Y" + info["notes"] = "page labels match" + + # either way, write out the info + csvwriter.writerow(info) From 0c89dd24043b625fff4b882416508f536354a00e Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 7 Mar 2024 12:49:14 -0500 Subject: [PATCH 08/71] Handle rsync for more records at once, add optional output dir param --- ppa/archive/import_util.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/ppa/archive/import_util.py b/ppa/archive/import_util.py index 84f4e176..734f8add 100644 --- a/ppa/archive/import_util.py +++ b/ppa/archive/import_util.py @@ -159,7 +159,14 @@ def import_digitizedwork(self, log_msg_src=None, user=None): class HathiImporter(DigitizedWorkImporter): """Logic for creating new :class:`~ppa.archive.models.DigitizedWork` - records from HathiTrust. For use in views and manage commands.""" + records from HathiTrust. For use in views and manage commands. + + :param list source_ids: list of HathiTrust source ids (htid) to + synchronize (optional) + :param bool rsync_output: determines whether rsync itemized report + is enabled (default: False) + :param str output_dir: base directory for rsync output file (optional) + """ #: rsync error RSYNC_ERROR = 4 @@ -178,10 +185,11 @@ class HathiImporter(DigitizedWorkImporter): } ) - def __init__(self, source_ids=None, rsync_output=False): + def __init__(self, source_ids=None, rsync_output=False, output_dir=""): super().__init__(source_ids) # track whether (and how much) rsync output is desired self.rsync_output = rsync_output + self.output_dir = output_dir def filter_invalid_ids(self): """Remove any ids that don't look valid. At minimum, must @@ -247,7 +255,15 @@ def pairtree_paths(self): def rsync_data(self): """Use rsync to retrieve data for the volumes to be imported.""" - logger.info("rsyncing pairtree data for %s", ", ".join(self.source_ids)) + # limit the number of ids included in the log message + log_detail = "" + rsync_count = len(self.source_ids) + if rsync_count <= 12: + log_detail = ", ".join(self.source_ids) + else: + log_detail = "%d volumes" % rsync_count + + logger.info("rsyncing pairtree data for %s", log_detail) # create temp file with list of paths to synchronize with tempfile.NamedTemporaryFile( @@ -271,8 +287,9 @@ def rsync_data(self): # if rsync output requested, include itemize and log fileargs output_opts = "" if self.rsync_output: - outputfilename = "ppa_hathi_rsync_%s.log" % datetime.now().strftime( - "%Y%m%d-%H%M%S" + outputfilename = os.path.join( + self.output_dir, + "ppa_hathi_rsync_%s.log" % datetime.now().strftime("%Y%m%d-%H%M%S"), ) # output requested: always log content to a file output_opts = "--log-file=%s" % outputfilename @@ -292,7 +309,7 @@ def rsync_data(self): subprocess.run(args=rsync_cmd.split(), check=True) except subprocess.CalledProcessError as err: logger.error( - "HathiTrust rsync failed — %s / command: %s" + "HathiTrust rsync failed — %s / command: %s" % (self.RSYNC_RETURN_CODES[err.returncode], rsync_cmd) ) From 55559278f218651040ae7aa825316ad4fc70852b Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 7 Mar 2024 12:51:35 -0500 Subject: [PATCH 09/71] Update hathi_rsync command for bulk rsync, report on updated htids --- .../management/commands/hathi_rsync.py | 76 +++++++++++++++---- 1 file changed, 61 insertions(+), 15 deletions(-) diff --git a/ppa/archive/management/commands/hathi_rsync.py b/ppa/archive/management/commands/hathi_rsync.py index cc160564..94244405 100644 --- a/ppa/archive/management/commands/hathi_rsync.py +++ b/ppa/archive/management/commands/hathi_rsync.py @@ -1,4 +1,8 @@ -from django.core.management.base import BaseCommand, CommandError +import os.path +from datetime import datetime + +from django.core.management.base import BaseCommand +from pairtree import path2id from ppa.archive.import_util import HathiImporter from ppa.archive.models import DigitizedWork @@ -26,22 +30,64 @@ def handle(self, *args, **kwargs): # use ids specified via command line when present htids = kwargs.get("htids", []) - # if hathi ids not specified via command line, - # get all non-suppressed hathi records - if not htids: - htids = DigitizedWork.objects.filter( - status=DigitizedWork.PUBLIC, source=DigitizedWork.HATHI - ).values_list("source_id", flat=True) + # by default, sync data for all non-suppressed hathi source ids + digworks = DigitizedWork.objects.filter( + status=DigitizedWork.PUBLIC, source=DigitizedWork.HATHI + ) - # NOTE: if htid is specified, should we verify that it's - # in the db and not suppressed? (should import first if not) + # if htids are specified via parameter, use them to filter + # the queryset, to ensure we only sync records that are + # in the database and not suppressed + if htids: + digworks = digworks.filter(source_id__in=htids) + # NOTE: report here on any skipped ids? - self.stdout.write( - self.style.SUCCESS("Synchronizing data for %d records" % len(htids)) - ) - # even if verbosity is zero we want an output file + # generate a list of unique source ids from the queryset + hathi_ids = digworks.values_list("source_id", flat=True).distinct() + self.stdout.write("Synchronizing data for %d records" % len(hathi_ids)) + # we always want itemized rsync output, so we can report + # on which volumes were updated htimporter = HathiImporter( - source_ids=htids, rsync_output=self.verbosity or True + source_ids=hathi_ids, rsync_output=True, output_dir="/tmp" ) logfile = htimporter.rsync_data() - self.stdout.write(self.style.SUCCESS("rsync output is in %s" % logfile)) + + # read the rsync itemized output to identify records where file + # sizes changed + updated_ids = set() + with open(logfile) as rsync_output: + for line in rsync_output: + # if a line indicates that a file was updated due + # to a change in size, use the path to determine the hathi id + if " >f.s" in line: + # rsync itemized output is white-space delimited; + # last element is the filename that was updated + filename = line.rsplit()[-1].strip() + # we only care about zip files and mets.xml files + if not filename.endswith(".zip") and not filename.endswith(".xml"): + continue + # reconstruct the hathi id from the filepath + ht_prefix, pairtree_dir = filename.split("/pairtree_root/") + # get the directory one level up from the updated file + pairtree_id = os.path.dirname(os.path.dirname(pairtree_dir)) + # use pairtree to determine the id based on the path + # (handles special characters like those used in ARKs) + htid = f"{ht_prefix}.{path2id(pairtree_id)}" + updated_ids.add(htid) + + # should this behavior only be when updating all? + # if specific htids are specified on the command line, maybe report on them only? + if updated_ids: + outfilename = "ppa_rsync_updated_htids_%s.txt" % datetime.now().strftime( + "%Y%m%d-%H%M%S" + ) + with open(outfilename, "w") as outfile: + outfile.write("\n".join(sorted(updated_ids))) + success_msg = ( + f"File sizes changed for {len(updated_ids)} hathi ids; " + + f"full list in {outfilename}" + ) + else: + success_msg = "rsync completed; no changes to report" + + self.stdout.write(self.style.SUCCESS(success_msg)) From 77f35d073a18c995c2e987d6c650b6464ff55312 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 7 Mar 2024 13:21:13 -0500 Subject: [PATCH 10/71] Update tests for change to where mets xml is loaded --- ppa/archive/tests/test_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppa/archive/tests/test_models.py b/ppa/archive/tests/test_models.py index b97a9ef5..75668791 100644 --- a/ppa/archive/tests/test_models.py +++ b/ppa/archive/tests/test_models.py @@ -1011,7 +1011,7 @@ def test_hathi_page_index_data(self, mockzipfile): mets = load_xmlobject_from_file(TestDigitizedWork.metsfile, hathi.MinimalMETS) with patch.object(DigitizedWork, "hathi") as mock_hathiobj: mock_hathiobj.zipfile_path.return_value = "/path/to/79279237.zip" - mock_hathiobj.metsfile_path.return_value = TestDigitizedWork.metsfile + mock_hathiobj.mets_xml.return_value = mets mock_hathiobj.content_dir = "data" page_data = Page.page_index_data(work) From 7be1c08af48acf9959e4dddf966512d53e928f7b Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Thu, 7 Mar 2024 14:50:29 -0500 Subject: [PATCH 11/71] Update ppa/archive/import_util.py Co-authored-by: Laure Thompson <602628+laurejt@users.noreply.github.com> --- ppa/archive/import_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppa/archive/import_util.py b/ppa/archive/import_util.py index 734f8add..4bcd5ad0 100644 --- a/ppa/archive/import_util.py +++ b/ppa/archive/import_util.py @@ -258,7 +258,7 @@ def rsync_data(self): # limit the number of ids included in the log message log_detail = "" rsync_count = len(self.source_ids) - if rsync_count <= 12: + if rsync_count <= 10: log_detail = ", ".join(self.source_ids) else: log_detail = "%d volumes" % rsync_count From 4108474ab08af15199628e6a40eac444215a7433 Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Thu, 7 Mar 2024 14:54:05 -0500 Subject: [PATCH 12/71] Update ppa/archive/management/commands/hathi_rsync.py Co-authored-by: Laure Thompson <602628+laurejt@users.noreply.github.com> --- ppa/archive/management/commands/hathi_rsync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppa/archive/management/commands/hathi_rsync.py b/ppa/archive/management/commands/hathi_rsync.py index 94244405..f337c6a0 100644 --- a/ppa/archive/management/commands/hathi_rsync.py +++ b/ppa/archive/management/commands/hathi_rsync.py @@ -67,7 +67,7 @@ def handle(self, *args, **kwargs): if not filename.endswith(".zip") and not filename.endswith(".xml"): continue # reconstruct the hathi id from the filepath - ht_prefix, pairtree_dir = filename.split("/pairtree_root/") + ht_prefix, pairtree_dir = filename.split("/pairtree_root/", 1) # get the directory one level up from the updated file pairtree_id = os.path.dirname(os.path.dirname(pairtree_dir)) # use pairtree to determine the id based on the path From 4d716788d5637c5246ff5ac3ea0c267fb68d5c39 Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Thu, 7 Mar 2024 15:45:48 -0500 Subject: [PATCH 13/71] Update ppa/archive/management/commands/hathi_rsync.py Co-authored-by: Laure Thompson <602628+laurejt@users.noreply.github.com> --- ppa/archive/management/commands/hathi_rsync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppa/archive/management/commands/hathi_rsync.py b/ppa/archive/management/commands/hathi_rsync.py index f337c6a0..127c42ee 100644 --- a/ppa/archive/management/commands/hathi_rsync.py +++ b/ppa/archive/management/commands/hathi_rsync.py @@ -43,7 +43,7 @@ def handle(self, *args, **kwargs): # NOTE: report here on any skipped ids? # generate a list of unique source ids from the queryset - hathi_ids = digworks.values_list("source_id", flat=True).distinct() + working_htids = digworks.values_list("source_id", flat=True).distinct() self.stdout.write("Synchronizing data for %d records" % len(hathi_ids)) # we always want itemized rsync output, so we can report # on which volumes were updated From c2333785430ad3296648f5359060b815d8974849 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 7 Mar 2024 16:21:34 -0500 Subject: [PATCH 14/71] Report skipped htid; use proper temp directory; report any changed files per feedback from @laurejt --- .../management/commands/hathi_rsync.py | 74 ++++++++++++++----- 1 file changed, 56 insertions(+), 18 deletions(-) diff --git a/ppa/archive/management/commands/hathi_rsync.py b/ppa/archive/management/commands/hathi_rsync.py index 127c42ee..4b9532ee 100644 --- a/ppa/archive/management/commands/hathi_rsync.py +++ b/ppa/archive/management/commands/hathi_rsync.py @@ -1,4 +1,6 @@ +import csv import os.path +import tempfile from datetime import datetime from django.core.management.base import BaseCommand @@ -40,29 +42,50 @@ def handle(self, *args, **kwargs): # in the database and not suppressed if htids: digworks = digworks.filter(source_id__in=htids) - # NOTE: report here on any skipped ids? # generate a list of unique source ids from the queryset working_htids = digworks.values_list("source_id", flat=True).distinct() - self.stdout.write("Synchronizing data for %d records" % len(hathi_ids)) + + # if htids were explicitly specified, report if any are skipped + if htids: + skipped_htids = set(htids) - set(working_htids) + if skipped_htids: + self.stdout.write( + self.style.NOTICE( + "Some ids not found in public HathiTrust volumes; skipping %s" + % " ".join(skipped_htids) + ) + ) + + # bail out if there's nothing to do + # (e.g., explicit htids only and none valid) + if not working_htids: + return + + self.stdout.write("Synchronizing data for %d records" % len(working_htids)) + + # create a tempdir for rsync logfile; will automatically be cleaned up + output_dir = tempfile.TemporaryDirectory(prefix="ppa-rsync_") # we always want itemized rsync output, so we can report - # on which volumes were updated + # on which htids have updated content htimporter = HathiImporter( - source_ids=hathi_ids, rsync_output=True, output_dir="/tmp" + source_ids=working_htids, rsync_output=True, output_dir=output_dir.name ) logfile = htimporter.rsync_data() - # read the rsync itemized output to identify records where file - # sizes changed - updated_ids = set() + # read the rsync itemized output to identify and report on changes + updated_files = [] with open(logfile) as rsync_output: for line in rsync_output: - # if a line indicates that a file was updated due - # to a change in size, use the path to determine the hathi id - if " >f.s" in line: - # rsync itemized output is white-space delimited; + # check for a line indicating that a file was updated + if " >f" in line: + # rsync itemized output is white-space delimited + parts = line.split() # last element is the filename that was updated - filename = line.rsplit()[-1].strip() + filename = parts[-1] + # itemized info flags preced the filename + flags = parts[-2] + # we only care about zip files and mets.xml files if not filename.endswith(".zip") and not filename.endswith(".xml"): continue @@ -73,19 +96,34 @@ def handle(self, *args, **kwargs): # use pairtree to determine the id based on the path # (handles special characters like those used in ARKs) htid = f"{ht_prefix}.{path2id(pairtree_id)}" - updated_ids.add(htid) + updated_files.append( + { + "htid": htid, + "filename": os.path.basename(filename), + # rsync itemized flags look like >f.st.... + # or >f+++++++ for new files + "size_changed": flags[3] == "s", + "modification_time": flags[4] == "t", + "rsync_flags": flags, + } + ) # should this behavior only be when updating all? # if specific htids are specified on the command line, maybe report on them only? - if updated_ids: - outfilename = "ppa_rsync_updated_htids_%s.txt" % datetime.now().strftime( + if updated_files: + outfilename = "ppa_rsync_changes_%s.csv" % datetime.now().strftime( "%Y%m%d-%H%M%S" ) + fields = updated_files[0].keys() + print(fields) with open(outfilename, "w") as outfile: - outfile.write("\n".join(sorted(updated_ids))) + csvwriter = csv.DictWriter(outfile, fieldnames=fields) + csvwriter.writeheader() + csvwriter.writerows(updated_files) + updated_htids = set([i["htid"] for i in updated_files]) success_msg = ( - f"File sizes changed for {len(updated_ids)} hathi ids; " - + f"full list in {outfilename}" + f"Updated {len(updated_files)} files for {len(updated_htids)} volumes; " + + f"full details in {outfilename}" ) else: success_msg = "rsync completed; no changes to report" From 1cab68d9852fe7318394ac6acfef43a8c398fffd Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 7 Mar 2024 16:50:15 -0500 Subject: [PATCH 15/71] Implement & test validation for rsync output directory --- ppa/archive/import_util.py | 16 ++++++++++++++-- ppa/archive/tests/test_import_util.py | 21 ++++++++++++++++++++- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/ppa/archive/import_util.py b/ppa/archive/import_util.py index 4bcd5ad0..f22376aa 100644 --- a/ppa/archive/import_util.py +++ b/ppa/archive/import_util.py @@ -165,7 +165,10 @@ class HathiImporter(DigitizedWorkImporter): synchronize (optional) :param bool rsync_output: determines whether rsync itemized report is enabled (default: False) - :param str output_dir: base directory for rsync output file (optional) + :param str output_dir: base directory for rsync output file + (required if `rsync_output` is True) + :raises ValueError: if output_dir is unset when rsync_output is True or + if output_dir is not an existing directory """ #: rsync error @@ -185,10 +188,19 @@ class HathiImporter(DigitizedWorkImporter): } ) - def __init__(self, source_ids=None, rsync_output=False, output_dir=""): + def __init__(self, source_ids=None, rsync_output=False, output_dir=None): super().__init__(source_ids) # track whether (and how much) rsync output is desired self.rsync_output = rsync_output + # if rsync output is enabled, output directory is required + if self.rsync_output: + if output_dir is None: + raise ValueError("output_dir is required when rsync_output is enabled") + elif not os.path.isdir(output_dir): + raise ValueError( + "rsync output dir %s is not an existing directory", output_dir + ) + self.output_dir = output_dir def filter_invalid_ids(self): diff --git a/ppa/archive/tests/test_import_util.py b/ppa/archive/tests/test_import_util.py index 9b012328..86d37fcc 100644 --- a/ppa/archive/tests/test_import_util.py +++ b/ppa/archive/tests/test_import_util.py @@ -25,7 +25,6 @@ class TestHathiImporter(TestCase): fixtures = ["sample_digitized_works"] def test_filter_existing_ids(self): - digwork_ids = DigitizedWork.objects.values_list("source_id", flat=True) # all existing - all should be flagged as existing @@ -269,6 +268,26 @@ def test_rsync_data(self, mocksubprocess): assert "ppa_hathi_pathlist" in cmd_args[-3] +def test_hathiimporter_init(tmp_path_factory): + # no rsync output, no output dir + htimporter = HathiImporter(["hvd.1234", "nyp.334455"]) + assert htimporter.rsync_output is False + assert htimporter.output_dir is None + + # rsync output requested with no output dir + with pytest.raises(ValueError, match="output_dir is required"): + HathiImporter(rsync_output=True) + + # rsync output requested with non-existent output dir + with pytest.raises(ValueError, match="not an existing directory"): + # rsync output requested with invalid output dir + HathiImporter(rsync_output=True, output_dir="/tmp/foo/bar") + + # with valid output dir + tmpdir = tmp_path_factory.mktemp("output") + assert HathiImporter(rsync_output=True, output_dir=str(tmpdir)) + + class TestGaleImporter(TestCase): @patch("ppa.archive.import_util.GaleAPI") def test_add_items_noop(self, mock_gale_api): From 4cdfeaf7943249a4f82fb0de284fdc1d89519d6a Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Thu, 7 Mar 2024 17:43:59 -0500 Subject: [PATCH 16/71] Feature/excerpt revisions (#616) * When suppressing excerpt, only delete data if the last from that volume fixes #591 * Enable admin save as new to copy record #591 * Add help text to clarify when page count is calculated * Add help text for page count field --- ppa/archive/admin.py | 3 ++ ...0020_digitizedwork_page_count_help_text.py | 21 +++++++++++ ppa/archive/models.py | 36 ++++++++++++++----- ppa/archive/tests/test_models.py | 26 ++++++++++++++ 4 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 ppa/archive/migrations/0020_digitizedwork_page_count_help_text.py diff --git a/ppa/archive/admin.py b/ppa/archive/admin.py index b9ac2fbd..6849fbe9 100644 --- a/ppa/archive/admin.py +++ b/ppa/archive/admin.py @@ -76,6 +76,9 @@ def get_queryset(self): class DigitizedWorkAdmin(ExportActionMixin, ExportMixin, admin.ModelAdmin): resource_class = DigitizedWorkResource # resource for export + # enable "save as new" button to copy and create a new record + save_as = True + list_display = ( "display_title", "subtitle", diff --git a/ppa/archive/migrations/0020_digitizedwork_page_count_help_text.py b/ppa/archive/migrations/0020_digitizedwork_page_count_help_text.py new file mode 100644 index 00000000..bc750bd6 --- /dev/null +++ b/ppa/archive/migrations/0020_digitizedwork_page_count_help_text.py @@ -0,0 +1,21 @@ +# Generated by Django 5.0.2 on 2024-03-07 22:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("archive", "0019_alter_cluster_options"), + ] + + operations = [ + migrations.AlterField( + model_name="digitizedwork", + name="page_count", + field=models.PositiveIntegerField( + blank=True, + help_text="Automatically calculated on import; recalculated on save when digital page range changes", + null=True, + ), + ), + ] diff --git a/ppa/archive/models.py b/ppa/archive/models.py index f7959722..c311ff59 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -380,8 +380,13 @@ class DigitizedWork(ModelIndexable, TrackChangesModel): publisher = models.TextField(blank=True) # Needs to be integer to allow aggregating max/min, filtering by date pub_date = models.PositiveIntegerField("Publication Date", null=True, blank=True) - #: number of pages in the work - page_count = models.PositiveIntegerField(null=True, blank=True) + #: number of pages in the work (or page range, for an excerpt) + page_count = models.PositiveIntegerField( + null=True, + blank=True, + help_text="Automatically calculated on import; " + + "recalculated on save when digital page range changes", + ) #: public notes field for this work public_notes = models.TextField( blank=True, @@ -559,12 +564,25 @@ def hathi(self): def save(self, *args, **kwargs): # if status has changed so that object is now suppressed, # do some cleanup - if self.has_changed("status") and self.status == self.SUPPRESSED: - # remove indexed page content from Solr - self.solr.update.delete_by_query('source_id:"%s"' % self.source_id) + if self.has_changed("status") and self.status == DigitizedWork.SUPPRESSED: + # remove indexed page content from Solr using index id + # (i.e., if excerpt, should only remove content for this excerpt, + # not all excerpts in this volume) + self.solr.update.delete_by_query('group_id_s:"%s"' % self.index_id()) # if this is a HathiTrust item, remove pairtree data if self.source == DigitizedWork.HATHI: - self.hathi.delete_pairtree_data() + # if this is a full work (not excerpted), remove + # if this is an excerpt, should only remove if there are no other + # public excerpts from this volume + if ( + self.item_type == DigitizedWork.FULL + or not DigitizedWork.objects.filter( + status=DigitizedWork.PUBLIC, source_id=self.source_id + ) + .exclude(pk=self.pk) + .exists() + ): + self.hathi.delete_pairtree_data() # Solr identifier is based on combination of source id and first page; # if either changes, remove the old record from Solr before saving @@ -582,10 +600,10 @@ def save(self, *args, **kwargs): self.pages_digital = new_pages_digital if self.has_changed("pages_digital"): - # if there is a page range set now, update page count and index + # update the page count if possible (i.e., not a Gale record) + self.page_count = self.count_pages() + # if there is a page range set, update page count and index if self.pages_digital: - # recalculate page total based on current range - self.page_count = self.count_pages() # update index to remove all pages that are no longer in range self.solr.update.delete_by_query( 'source_id:"%s" AND item_type:page NOT order:(%s)' diff --git a/ppa/archive/tests/test_models.py b/ppa/archive/tests/test_models.py index 2aaec020..926532c4 100644 --- a/ppa/archive/tests/test_models.py +++ b/ppa/archive/tests/test_models.py @@ -689,6 +689,32 @@ def test_save_suppress(self): work.save() mock_hathiobj.delete_pairtree_data.assert_not_called() + def test_save_suppress_excerpt(self): + work = DigitizedWork(source_id="chi.79279237", item_type=DigitizedWork.EXCERPT) + with patch.object(work, "hathi") as mock_hathiobj: + # no change in status - nothing should happen + work.save() + mock_hathiobj.delete_pairtree_data.assert_not_called() + + # change status to suppressed, no other excerpts in this volume + # - data should be deleted + work.status = work.SUPPRESSED + work.save() + assert mock_hathiobj.delete_pairtree_data.call_count == 1 + + # second public excerpt from the same valoume + DigitizedWork.objects.create( + source_id="chi.79279237", + item_type=DigitizedWork.EXCERPT, + pages_orig="3-5", + pages_digital="5-7", + ) + # reset mock so we can check it is not called + mock_hathiobj.delete_pairtree_data.reset_mock() + work.status = work.SUPPRESSED + work.save() + assert mock_hathiobj.delete_pairtree_data.call_count == 0 + def test_save_sourceid(self): # if source_id changes, old id should be removed from solr index work = DigitizedWork.objects.create( From 99c7059fa9f247c383d287a09d905d5e7ba339ec Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Fri, 8 Mar 2024 11:49:37 -0500 Subject: [PATCH 17/71] Update ppa/archive/management/commands/hathi_rsync.py Co-authored-by: Laure Thompson <602628+laurejt@users.noreply.github.com> --- ppa/archive/management/commands/hathi_rsync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppa/archive/management/commands/hathi_rsync.py b/ppa/archive/management/commands/hathi_rsync.py index 4b9532ee..5183ae69 100644 --- a/ppa/archive/management/commands/hathi_rsync.py +++ b/ppa/archive/management/commands/hathi_rsync.py @@ -83,7 +83,7 @@ def handle(self, *args, **kwargs): parts = line.split() # last element is the filename that was updated filename = parts[-1] - # itemized info flags preced the filename + # itemized info flags precede the filename flags = parts[-2] # we only care about zip files and mets.xml files From d4e714516c232dfc7d9c2127726b0108933fdb04 Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Fri, 8 Mar 2024 11:50:01 -0500 Subject: [PATCH 18/71] Update ppa/archive/import_util.py Co-authored-by: Laure Thompson <602628+laurejt@users.noreply.github.com> --- ppa/archive/import_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppa/archive/import_util.py b/ppa/archive/import_util.py index f22376aa..fa046a90 100644 --- a/ppa/archive/import_util.py +++ b/ppa/archive/import_util.py @@ -198,7 +198,7 @@ def __init__(self, source_ids=None, rsync_output=False, output_dir=None): raise ValueError("output_dir is required when rsync_output is enabled") elif not os.path.isdir(output_dir): raise ValueError( - "rsync output dir %s is not an existing directory", output_dir + f"rsync output dir {output_dir} is not an existing directory" ) self.output_dir = output_dir From b758fa22ddeaa9ab8956b6540d3543e05fb20070 Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Fri, 8 Mar 2024 11:50:20 -0500 Subject: [PATCH 19/71] Update ppa/archive/tests/test_import_util.py Co-authored-by: Laure Thompson <602628+laurejt@users.noreply.github.com> --- ppa/archive/tests/test_import_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppa/archive/tests/test_import_util.py b/ppa/archive/tests/test_import_util.py index 86d37fcc..6ea089ba 100644 --- a/ppa/archive/tests/test_import_util.py +++ b/ppa/archive/tests/test_import_util.py @@ -281,7 +281,7 @@ def test_hathiimporter_init(tmp_path_factory): # rsync output requested with non-existent output dir with pytest.raises(ValueError, match="not an existing directory"): # rsync output requested with invalid output dir - HathiImporter(rsync_output=True, output_dir="/tmp/foo/bar") + HathiImporter(rsync_output=True, output_dir="/foo/bar/baz") # with valid output dir tmpdir = tmp_path_factory.mktemp("output") From 380e04b61f7cb5af157502e533599e340b2f2101 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 8 Mar 2024 12:40:33 -0500 Subject: [PATCH 20/71] Improve readability and formatting based on suggestions from @laurejt Co-authored-by: Laure Thompson <602628+laurejt@users.noreply.github.com> --- ppa/archive/management/commands/hathi_rsync.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/ppa/archive/management/commands/hathi_rsync.py b/ppa/archive/management/commands/hathi_rsync.py index 5183ae69..1dad0f05 100644 --- a/ppa/archive/management/commands/hathi_rsync.py +++ b/ppa/archive/management/commands/hathi_rsync.py @@ -4,6 +4,7 @@ from datetime import datetime from django.core.management.base import BaseCommand +from django.template.defaultfilters import pluralize from pairtree import path2id from ppa.archive.import_util import HathiImporter @@ -52,17 +53,21 @@ def handle(self, *args, **kwargs): if skipped_htids: self.stdout.write( self.style.NOTICE( - "Some ids not found in public HathiTrust volumes; skipping %s" - % " ".join(skipped_htids) + f"{len(skipped_htids)} id{pluralize(skipped_htids)} " + + "not found in public HathiTrust volumes; " + + f"skipping {' '.join(skipped_htids)}" ) ) # bail out if there's nothing to do # (e.g., explicit htids only and none valid) if not working_htids: + self.stdout.write("No records to synchronize; stopping") return - self.stdout.write("Synchronizing data for %d records" % len(working_htids)) + self.stdout.write( + f"Synchronizing data for {len(working_htids)} record{pluralize(working_htids)}" + ) # create a tempdir for rsync logfile; will automatically be cleaned up output_dir = tempfile.TemporaryDirectory(prefix="ppa-rsync_") @@ -111,8 +116,8 @@ def handle(self, *args, **kwargs): # should this behavior only be when updating all? # if specific htids are specified on the command line, maybe report on them only? if updated_files: - outfilename = "ppa_rsync_changes_%s.csv" % datetime.now().strftime( - "%Y%m%d-%H%M%S" + outfilename = "ppa_rsync_changes_{time}.csv".format( + time=datetime.now().strftime("%Y%m%d-%H%M%S") ) fields = updated_files[0].keys() print(fields) From 11a2815a7ba2c8a3d4153e486811b12bc115563d Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 8 Mar 2024 12:45:15 -0500 Subject: [PATCH 21/71] Remove debug print statement; document how csv header row is populated --- ppa/archive/management/commands/hathi_rsync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppa/archive/management/commands/hathi_rsync.py b/ppa/archive/management/commands/hathi_rsync.py index 1dad0f05..bbf66cf2 100644 --- a/ppa/archive/management/commands/hathi_rsync.py +++ b/ppa/archive/management/commands/hathi_rsync.py @@ -119,8 +119,8 @@ def handle(self, *args, **kwargs): outfilename = "ppa_rsync_changes_{time}.csv".format( time=datetime.now().strftime("%Y%m%d-%H%M%S") ) + # use keys from the first row to populate csv header row fields = updated_files[0].keys() - print(fields) with open(outfilename, "w") as outfile: csvwriter = csv.DictWriter(outfile, fieldnames=fields) csvwriter.writeheader() From 50d67b08fbc0883716319fb713525e242afc393f Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 8 Mar 2024 13:58:07 -0500 Subject: [PATCH 22/71] Remove extraneous tabs in page count script summary output --- ppa/archive/management/commands/update_hathi_pagecounts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ppa/archive/management/commands/update_hathi_pagecounts.py b/ppa/archive/management/commands/update_hathi_pagecounts.py index c1b172a0..0bf945cf 100644 --- a/ppa/archive/management/commands/update_hathi_pagecounts.py +++ b/ppa/archive/management/commands/update_hathi_pagecounts.py @@ -82,6 +82,6 @@ def handle(self, *args, **kwargs): if self.verbosity >= self.v_normal: self.stdout.write( f"Volumes with updated page count: {stats['updated']:,}" - + f"\n\tPage count unchanged: {stats['unchanged']:,}" - + f"\n\tMissing pairtree data: {stats['missing_data']:,}" + + f"\nPage count unchanged: {stats['unchanged']:,}" + + f"\nMissing pairtree data: {stats['missing_data']:,}" ) From 3538f5db81cbc2edaac03c33572c6d171f71cdb3 Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Tue, 19 Mar 2024 12:45:20 -0400 Subject: [PATCH 23/71] Implement and test 303 redirect for multiple cluster params (#621) * Implement and test 303 redirect for multiple cluster params fixes #619 * Fix incorrect mock patch target --- ppa/archive/tests/test_views.py | 14 +++++++++++++- ppa/archive/views.py | 28 ++++++++++++++++------------ 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/ppa/archive/tests/test_views.py b/ppa/archive/tests/test_views.py index 2d5ec96a..8cfef2b0 100644 --- a/ppa/archive/tests/test_views.py +++ b/ppa/archive/tests/test_views.py @@ -329,7 +329,7 @@ def test_nonhathi_display(self): self.assertNotContains(response, "View external record") # search term should be ignored for items without fulltext - with patch("ppa.archive.views.SolrQuerySet") as mock_solrq: + with patch("ppa.archive.views.PageSearchQuerySet") as mock_solrq: response = self.client.get(thesis.get_absolute_url(), {"query": "lady"}) # not called at all assert mock_solrq.call_count == 0 @@ -1193,6 +1193,18 @@ def test_get_queryset(self): mock_qs.order_by.assert_called_with("sort_title") # default sort mock_qs.work_filter.assert_called_with(author="Robert") + def test_too_many_clusters(self): + archive_list_url = reverse("archive:list") + response = self.client.get(archive_list_url, {"cluster": ["one", "two"]}) + # if there is more than one cluster param, + # should redirect to archive search with a 303 See Other status code + assert response.status_code == 303 + assert response["Location"] == archive_list_url + # single cluster should be fine + assert self.client.get(archive_list_url, {"cluster": "one"}).status_code == 200 + # no cluster should also be fine + assert self.client.get(archive_list_url).status_code == 200 + class TestImportView(TestCase): superuser = {"username": "super", "password": str(uuid.uuid4())} diff --git a/ppa/archive/views.py b/ppa/archive/views.py index a9f37a4c..4bbf965a 100644 --- a/ppa/archive/views.py +++ b/ppa/archive/views.py @@ -1,9 +1,4 @@ -import csv import logging -from collections import OrderedDict, defaultdict -from http import HTTPStatus -from json.decoder import JSONDecodeError -from pprint import pprint import requests from django.contrib import messages @@ -12,18 +7,14 @@ from django.core.paginator import Paginator from django.http import ( Http404, - HttpResponse, HttpResponsePermanentRedirect, - HttpResponseRedirect, ) from django.shortcuts import get_object_or_404, redirect, render from django.urls import reverse from django.utils.http import urlencode -from django.utils.timezone import now from django.views.generic import DetailView, ListView from django.views.generic.base import RedirectView, TemplateView from django.views.generic.edit import FormView -from parasolr.django import SolrQuerySet from parasolr.django.views import SolrLastModifiedMixin from ppa.archive.forms import ( @@ -59,6 +50,18 @@ class DigitizedWorkListView(AjaxTemplateMixin, SolrLastModifiedMixin, ListView): # keyword query; assume no search terms unless set query = None + def get(self, *args, **kwargs): + # a bug used to allow aggregation of multiple cluster params, + # which is not supported; if detected, redirect to archive search + cluster_param = self.request.GET.getlist("cluster") + if cluster_param and len(cluster_param) > 1: + response = HttpResponsePermanentRedirect(reverse("archive:list")) + response.status_code = 303 # See other + return response + + # otherwise, process response normally + return super(DigitizedWorkListView, self).get(*args, **kwargs) + def get_queryset(self, **kwargs): form_opts = self.request.GET.copy() # if relevance sort is requested but there is no keyword search @@ -243,8 +246,8 @@ def get_context_data(self, **kwargs): # or an error status set on the response context["error"] = "Something went wrong." - page_groups_keys = set(page_groups.keys()) - page_highlights_keys = set(page_highlights.keys()) + set(page_groups.keys()) + set(page_highlights.keys()) context.update( { "search_form": self.form, @@ -354,7 +357,8 @@ def get_context_data(self, **kwargs): # only return fields needed for page result display, # configure highlighting on page text content solr_pageq = ( - PageSearchQuerySet() # NOTE: Addition of an aliased queryset changes the _s keys below + # NOTE: Addition of an aliased queryset changes the _s keys below + PageSearchQuerySet() .search(content="(%s)" % query) .filter(group_id='"%s"' % digwork.index_id(), item_type="page") .highlight("content", snippets=3, method="unified") From 468496e41af3c02a7575d5250531b238c4fdc7cb Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 19 Mar 2024 13:55:19 -0400 Subject: [PATCH 24/71] Fix 1-based indexing when checking excerpt page ranges Provide output to notify filename of report --- ppa/archive/management/commands/check_hathi_excerpts.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ppa/archive/management/commands/check_hathi_excerpts.py b/ppa/archive/management/commands/check_hathi_excerpts.py index ede456d4..5838566b 100644 --- a/ppa/archive/management/commands/check_hathi_excerpts.py +++ b/ppa/archive/management/commands/check_hathi_excerpts.py @@ -40,7 +40,8 @@ def handle(self, *args, **kwargs): "notes", ] - with open("ppa-excerpt-pagecheck.csv", "w") as csvfile: + report_filename = "ppa-excerpt-pagecheck.csv" + with open(report_filename, "w") as csvfile: csvwriter = csv.DictWriter(csvfile, fieldnames=output_fields) csvwriter.writeheader() @@ -78,8 +79,9 @@ def handle(self, *args, **kwargs): # use digital page range to get the first page in the mets # that would be included with current digital range (1-based index) + try: - excerpt_first_page = page_info[digwork.first_page_digital() + 1] + excerpt_first_page = page_info[digwork.first_page_digital() - 1] except IndexError: if digwork.first_page_digital() >= len(page_info): excerpt_first_page[-1] @@ -126,3 +128,5 @@ def handle(self, *args, **kwargs): # either way, write out the info csvwriter.writerow(info) + + self.stdout.write(f"Excerpt page check report available in {report_filename}") From 0fdb842f1f1c6724d656b9f9a1dc7009ceebf70e Mon Sep 17 00:00:00 2001 From: Laure Thompson <602628+laurejt@users.noreply.github.com> Date: Thu, 21 Mar 2024 16:22:53 -0400 Subject: [PATCH 25/71] Feature/collect version labels (#624) Adds script for collecting version labels of HathiTrust records --------- Co-authored-by: Rebecca Sutton Koeser --- scripts/README.md | 24 + scripts/get_version_labels.py | 94 ++++ scripts/ht-excerpts-2023-09-20.txt | 517 +++++++++++++++++ .../version-labels-2024-03-20.tsv | 518 ++++++++++++++++++ .../version-labels-2024-03-21.tsv | 518 ++++++++++++++++++ 5 files changed, 1671 insertions(+) create mode 100644 scripts/README.md create mode 100644 scripts/get_version_labels.py create mode 100644 scripts/ht-excerpts-2023-09-20.txt create mode 100644 scripts/version-labels/version-labels-2024-03-20.tsv create mode 100644 scripts/version-labels/version-labels-2024-03-21.tsv diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 00000000..8a999eb6 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,24 @@ +# PPA Scripts + +This directory contains stand-alone scripts associated with the Princeton +Prosody Archive that are not part of the web application proper. + +At this time, these scripts do not have any additional requirements. + +## HathiTrust "Version" Timestamps +This script extracts and saves the version timestamp information from the +public HathiTrust interface for a set of HathiTrust volumes. By default, +the set of volumes corresponds to PPA excerpt records (based on an exported +report). + +- `get_version_labels.py`: The script to run. This script extracts HathiTrust +volume identifiers (htids) from a text file containing one htid per line. By +default, the input file is `ht-excerpts-2023-09-20.txt`, but an alternative file +can be specified as input. It writes its output as a tsv +with columns corresponding to htids and their extracted version timestamps. + - input: Input `.txt` file. If none specified, + `ht-excerpts-2023-09-20.txt`. + - output: `version-labels/version-labels-[current date].tsv`. If this file + already exists, then the output file corresponds to a new (non-existing) + file `version-labels/version-labels-[current date]-[i].tsv` where `i` + is the smallest possible, postive integer. diff --git a/scripts/get_version_labels.py b/scripts/get_version_labels.py new file mode 100644 index 00000000..4c10a878 --- /dev/null +++ b/scripts/get_version_labels.py @@ -0,0 +1,94 @@ +""" +Extract version labels from HathiTrust volume pages. +""" +import sys +import os.path +import re +import time +import datetime +import requests + + +def get_version_label(htid): + """ + Extract the HathiTrust "version label" from a record's catalog page. + Returns the corresponding timestamp, returns None if the HTTP request fails. + """ + # a script block in HT record page includes a number of HT.params including version timestamp + re_pattern = r'HT.params.versionLabel = "([^"]+)";' + catalog_url = f"https://hdl.handle.net/2027/{htid}" + try: + r = requests.get(catalog_url, timeout=5) + except requests.exceptions.Timeout: + # Handle timeouts gracefully (catch and continue) + print(f"Warning: request timed out for '{htid}'") + return + if r.status_code == requests.codes['ok']: + # Extract version_label from response text + version_label = re.findall(re_pattern, r.text) + if version_label: + return version_label[0] + else: + print(f"Warning: {htid} missing versionLabel!") + else: + print(f"Warning: bad/unexpected response for '{htid}'") + + +def get_version_labels(htids, wait_time=1): + """ + Extracts the HathiTrust "version label" for each record within htids. + Returns a list of the extracted htid-timestamp pairs. + """ + version_pairs = [] + n_skipped = 0 + n_htids = len(htids) + for i, htid in enumerate(htids): + if i: + # Wait wait_time seconds between requests + time.sleep(wait_time) + # show progress + if i % 10 == 0: + print(f"Progress: {i}/{n_htids}") + version_label = get_version_label(htid) + if version_label: + version_pairs.append((htid, version_label)) + else: + n_skipped += 1 + if n_skipped: + print(f"Warning: Failed to gather versions for {n_skipped} volumes") + return version_pairs + + +if __name__ == "__main__": + if len(sys.argv) not in [1,2]: + print("Usage: ([htids list])") + sys.exit(1) + + # Check if an input file has been provided + in_tsv = "ht-excerpts-2023-09-20.txt" # Default value + if len(sys.argv) == 2: + in_tsv = sys.argv[1] + + # Determine output file + out_pfx = os.path.join("version-labels", + f"version-labels-{datetime.date.today()}") + out_tsv = f"{out_pfx}.tsv" + i = 0 + while os.path.isfile(out_tsv): + # File exists, so update increment and add index + i += 1 + out_tsv = f"{out_pfx}-{i}.tsv" + + # Get htids + htids = [] + with open(in_tsv) as reader: + for line in reader: + htid = line.strip() + htids.append(htid) + version_pairs = get_version_labels(htids) + + # Write version labels to file + with open(out_tsv, mode='w') as writer: + writer.write(f"htid\tversion_label\n") + for htid, version_label in version_pairs: + writer.write(f"{htid}\t{version_label}\n") diff --git a/scripts/ht-excerpts-2023-09-20.txt b/scripts/ht-excerpts-2023-09-20.txt new file mode 100644 index 00000000..1a5f096b --- /dev/null +++ b/scripts/ht-excerpts-2023-09-20.txt @@ -0,0 +1,517 @@ +hvd.32044090278565 +nyp.33433081683744 +uc1.b3924132 +mdp.39015026482151 +uiug.30112106245936 +hvd.32044009576562 +nyp.33433067294433 +coo.31924065856167 +uc1.ax0002627784 +wu.89001946482 +uc1.b3311895 +hvd.32044048963128 +njp.32101076199213 +coo.31924051399685 +njp.32101076472800 +njp.32101076472859 +nyp.33433074380688 +hvd.32044019842491 +uc1.32106001559381 +mdp.39015024071642 +hvd.hx28d7 +uva.x002111617 +uc1.$b161790 +uc1.$b683534 +nyp.33433076066723 +miun.ajd7522.0001.001 +hvd.hxv9b7 +coo.31924065856167 +uc1.$b275098 +nyp.33433081676979 +coo.31924066177589 +hvd.32044011432754 +njp.32101076384609 +pst.000020068974 +mdp.39015087700681 +njp.32101076403078 +njp.32101075673655 +udel.31741113248746 +hvd.hn34f5 +inu.30000066028642 +uc1.b3794203 +uc1.b3794204 +inu.30000099671491 +hvd.hnqbsu +mdp.39015020696541 +njp.32101075672541 +mdp.39015060441675 +hvd.32044031554363 +hvd.32044048963185 +njp.32101076403300 +coo.31924057525382 +coo.31924065580551 +mdp.39015008833884 +njp.32101010945275 +njp.32101076201159 +inu.30000099671632 +yale.39002004065844 +hvd.32044098627870 +inu.30000092253941 +mdp.39015060429308 +mdp.39015060429464 +uc1.b3385165 +uiug.30112042710548 +uc1.b3548551 +uc1.b3850894 +hvd.hnqbts +mdp.39015024071824 +hvd.32044050827351 +coo.31924065585840 +mdp.39015048893195 +mdp.39015053252139 +mdp.39015059489032 +uc1.b3385486 +inu.30000099671665 +mdp.39015060425942 +coo.31924066146733 +uc1.b2905408 +hvd.32044010335081 +coo.31924057525861 +njp.32101076201167 +hvd.32044014419220 +mdp.39015053262393 +mdp.39015060429746 +uc1.b2972967 +umn.31951000742933f +hvd.hwqu51 +mdp.39015060425751 +uiuo.ark:/13960/t4qk01n82 +njp.32101063578718 +mdp.39015009286215 +nyp.33433082488895 +hvd.32044098628217 +hvd.32044043851013 +njp.32101076199171 +mdp.39015060426742 +nyp.33433082488911 +njp.32101075672806 +mdp.39015033845549 +nyp.33433076055809 +njp.32101076425980 +coo.31924057522082 +mdp.39015004858224 +coo.31924065856167 +uiug.30112042290434 +uc1.b3011277 +mdp.39015003346247 +mdp.39015049192902 +njp.32101076379989 +njp.32101076533932 +chi.78323978 +chi.55229744 +njp.32101073758805 +uc2.ark:/13960/t4bp05b0f +mdp.39015059488877 +chi.78323841 +mdp.39015005484020 +hvd.32044010332070 +njp.32101047468010 +uc1.b3627386 +hvd.32044092658095 +hvd.32044014692362 +nyp.33433000183008 +njp.32101080222720 +njp.32101074834787 +uc1.31158010000023 +nyp.33433067366678 +hvd.hwa2b7 +hvd.32044103001111 +umn.31951t00020309x +mdp.39015008880067 +chi.78023993 +hvd.32044092738798 +nyp.33433081658886 +mdp.39015043572422 +njp.32101007684655 +uc1.32106020079791 +hvd.32044092797216 +mdp.39015059846678 +mdp.39015036664038 +chi.78013704 +mdp.39015059395619 +uc1.b3627386 +hvd.32044048963011 +nyp.33433081646642 +nyp.33433076055809 +mdp.39015060429415 +hvd.hxkepr +uiug.30112001676896 +mdp.39015060429423 +chi.12153205 +mdp.39015026482151 +mdp.39015063933546 +inu.30000099860342 +njp.32101076425485 +umn.31951p00293997r +wu.89001946482 +uc1.b3311895 +hvd.32044092624287 +uc1.b3627386 +uc1.$b31654 +inu.30000099671764 +hvd.32044094024825 +njp.32101020794176 +hvd.32044012913166 +njp.32101076451556 +njp.32101076472966 +mdp.39015060429597 +coo.31924066328299 +coo.31924066328299 +njp.32101076180460 +mdp.39015012991363 +njp.32101075673754 +njp.32101075672665 +msu.31293018462196 +njp.32101077262895 +hvd.32044092797182 +mdp.39015043572588 +nyp.33433081659017 +uc1.b2900825 +mdp.39015060424127 +njp.32101075672905 +hvd.32044010396893 +mdp.39015010329087 +njp.32101076890142 +mdp.39015010328121 +uc1.b2972398 +hvd.ah3rmv +mdp.39015048909868 +njp.32101074443415 +mdp.39015078153817 +hvd.32044040731473 +pst.000068744151 +njp.32101065266668 +njp.32101065266668 +hvd.32044086720679 +coo.31924057531109 +mdp.39015060429332 +chi.19606141 +hvd.32044010495000 +hvd.32044024285587 +njp.32101076201175 +uva.x030453236 +mdp.39015043572661 +uc1.b2905409 +mdp.39015060429555 +mdp.39015060429498 +mdp.39015008601109 +nyp.33433074853965 +njp.32101068158847 +hvd.hnlia3 +loc.ark:/13960/t3gx4tr12 +hvd.tz1l4c +mdp.39015060430108 +hvd.tz1l4w +njp.32101010945226 +mdp.39015031048211 +njp.32101077276523 +njp.32101076782703 +hvd.32044050831999 +mdp.39015012330885 +uc1.b3546679 +njp.32101075673481 +mdp.39015020441104 +coo.31924106553286 +njp.32101076472792 +mdp.39015043572372 +hvd.32044050827351 +mdp.39015063944592 +njp.32101063578650 +njp.32101076433125 +mdp.39015048893252 +coo.31924065856167 +njp.32101075716934 +coo.31924057525606 +coo.31924062186204 +uc1.b3385173 +mdp.39015059402357 +njp.32101075672624 +hvd.ah3kfk +njp.32101076795333 +njp.32101023869397 +uc1.$b312189 +njp.32101007684614 +hvd.32044092624352 +uva.x000240890 +nyp.33433067294433 +nyp.33433067294433 +coo.31924007186517 +njp.32101047468002 +hvd.32044009907841 +hvd.32044021008149 +njp.32101076201183 +njp.32101076457744 +mdp.39015060430082 +uc1.b3385513 +mdp.39015053252139 +inu.30000092253925 +pst.000008820648 +mdp.39015008570205 +coo.31924075116701 +nyp.33433089908747 +hvd.32044086791217 +njp.32101072577347 +njp.32101071985772 +mdp.39015056480562 +umn.31951002792969k +nyp.33433087345637 +mdp.39015022469087 +inu.30000084048762 +njp.32101023869397 +mdp.39015008095047 +njp.32101077260600 +njp.32101077260618 +nyp.33433074380704 +inu.30000104007657 +njp.32101076457702 +mdp.39015043800013 +nyp.33433004518415 +hvd.32044038399135 +njp.32101077288247 +njp.32101076199130 +njp.32101076530979 +uc1.c2608792 +njp.32101076530979 +nyp.33433074380662 +hvd.32044011856838 +uc1.b3919785 +wu.89001946482 +uc1.b3311895 +uc2.ark:/13960/t8w95458t +mdp.39015013094217 +mdp.39015008305289 +njp.32101076530979 +hvd.hwp8ba +njp.32101076457066 +coo1.ark:/13960/t4bp0n867 +njp.32101076530979 +uc1.b2972949 +njp.32101045352828 +njp.32101047467988 +hvd.32044086759800 +uc1.b3885866 +mdp.39015060429357 +njp.32101077288569 +aeu.ark:/13960/t1pg22p71 +nyp.33433082488887 +njp.32101064475831 +uc1.31175035197097 +njp.32101076880150 +coo.31924008821047 +nyp.33433074380720 +hvd.32044038400958 +mdp.39015030932753 +nyp.33433074380696 +njp.32101076889508 +njp.32101077288239 +hvd.32044098627268 +hvd.32044092634013 +hvd.32044014683114 +hvd.32044058190059 +umn.31951002804000l +uc1.$b661479 +hvd.32044092645134 +mdp.39015060429357 +mdp.39015074687149 +mdp.39015035805772 +pst.000068744458 +mdp.39015016898432 +uc1.b3924130 +uc1.b3924129 +njp.32101075672509 +uc1.b3293449 +loc.ark:/13960/t1fj37n7j +loc.ark:/13960/t9280zf6z +njp.32101076200664 +njp.32101076403425 +mdp.39015060429340 +mdp.39015003348201 +mdp.39015067091739 +mdp.39015059896285 +mdp.39015060430397 +inu.30000099860565 +njp.32101073025528 +hvd.32044090276395 +hvd.hwilnp +dul1.ark:/13960/t6d23816n +njp.32101075716934 +njp.32101075716934 +njp.32101076199239 +hvd.hnqbsv +nyp.33433081647616 +njp.32101037023239 +njp.32101037601646 +njp.32101063578791 +hvd.32044012418034 +ucm.5326809190 +mdp.39015054289338 +njp.32101074443332 +njp.32101074443399 +njp.32101074443415 +njp.32101021580343 +inu.30000099860326 +njp.32101076041084 +njp.32101075672749 +njp.32101075729960 +hvd.32044098627433 +uc1.b3885859 +mdp.39015060429530 +njp.32101077288213 +mdp.39076000323746 +inu.32000000683138 +mdp.39015027588287 +mdp.39015073107768 +coo.31924057525671 +hvd.32044092711480 +uc1.b3924126 +hvd.32044038400958 +hvd.32044092797232 +njp.32101076384435 +njp.32101076378189 +hvd.32044009957044 +njp.32101076378536 +hvd.hnqbsx +hvd.32044012418034 +nyp.33433081756896 +chi.55220547 +njp.32101076889979 +chi.79213384 +uiuo.ark:/13960/t4qk01n82 +hvd.32044048962955 +hvd.32044092677376 +uc1.32106001646766 +uc1.32106015528877 +nyp.33433081672853 +njp.32101076426079 +njp.32101076040946 +mdp.39015048893823 +uc1.b2974316 +uc2.ark:/13960/t8ff3wr3q +hvd.hxkepr +njp.32101076457728 +coo.31924069259624 +hvd.32044092640663 +njp.32101075673622 +mdp.39015049192910 +hvd.32044092796093 +mdp.39015030866506 +hvd.32044092797190 +chi.12755443 +hvd.32044048963029 +hvd.hnqbsr +mdp.39015048893831 +hvd.32044048963136 +hvd.32044103001129 +hvd.hxe6bx +uiug.30112001676896 +inu.30000099671525 +inu.30000099671624 +uiug.30112046384886 +hvd.hnqbtn +njp.32101065266304 +njp.32101076201183 +mdp.39015060429381 +njp.32101075672871 +hvd.32044010396893 +uc1.b3385477 +nyp.33433082219621 +njp.32101064467036 +hvd.32044031571342 +hvd.32044054989868 +inu.30000099671566 +mdp.39015060429399 +mdp.39015033845689 +hvd.32044098628274 +mdp.39015059397953 +nyp.33433075914071 +mdp.39015008095104 +njp.32101065270892 +hvd.32044092711431 +coo.31924066518758 +uc1.ax0003129954 +njp.32101076472909 +mdp.39015060429480 +mdp.39015030936325 +hvd.32044098641632 +njp.32101075672855 +chi.79279237 +hvd.32044011590692 +njp.32101076472917 +mdp.39015059402340 +umn.31951002792970z +njp.32101076472958 +mdp.39015060430116 +njp.32101076472933 +mdp.39015060430058 +mdp.39015060430371 +mdp.39015014523602 +hvd.32044098641343 +njp.32101007893256 +njp.32101063551608 +hvd.32044092754100 +mdp.39015041879613 +nyp.33433074829270 +njp.32101075672608 +hvd.hnqbst +hvd.hxe6bz +hvd.32044010396893 +nyp.33433082219902 +coo1.ark:/13960/t3st84m4q +mdp.39015036664079 +loc.ark:/13960/t0xp7hp6s +uc1.c2641998 +hvd.32044092796085 +hvd.32044092797208 +mdp.39015008095153 +mdp.39015060429506 +uc1.$b272656 +hvd.hnle8h +nyp.33433000182992 +inu.30000099671723 +mdp.39015060429589 +hvd.hnqbtj +mdp.39015060429522 +hvd.hxe6c3 +nyp.33433074894126 +njp.32101075672632 +mdp.39015043572539 +hvd.hx2hrd +njp.32101047467996 +njp.32101047468002 +coo1.ark:/13960/t70v9287j +njp.32101077879508 +mdp.39015060429449 +mdp.39015087701341 +hvd.32044074313453 +njp.32101076457785 +mdp.39015060429548 +chi.78013677 +mdp.39015015383279 +nyp.33433076071004 +hvd.hnqbtr +inu.30000099671541 +coo1.ark:/13960/t9n30f16x +mdp.39015073107529 +njp.32101063578627 +coo.31924062189661 +njp.32101076471414 +coo.31924066146733 +mdp.39015060429431 +mdp.39015049192894 +uc1.b2972410 +mdp.39015062280055 +inu.30000099671558 +inu.30000104005750 +njp.32101077262788 +mdp.39015049192928 +njp.32101075672616 diff --git a/scripts/version-labels/version-labels-2024-03-20.tsv b/scripts/version-labels/version-labels-2024-03-20.tsv new file mode 100644 index 00000000..9cc72944 --- /dev/null +++ b/scripts/version-labels/version-labels-2024-03-20.tsv @@ -0,0 +1,518 @@ +htid version_label +hvd.32044090278565 2022-12-04 12:12 UTC +nyp.33433081683744 2022-11-10 15:09 UTC +uc1.b3924132 2022-06-16 11:33 UTC +mdp.39015026482151 2023-07-22 18:55 UTC +uiug.30112106245936 2024-02-28 15:57 UTC +hvd.32044009576562 2023-10-07 22:52 UTC +nyp.33433067294433 2023-05-14 08:02 UTC +coo.31924065856167 2024-02-18 07:26 UTC +uc1.ax0002627784 2023-10-27 18:39 UTC +wu.89001946482 2023-07-21 14:01 UTC +uc1.b3311895 2023-05-24 06:50 UTC +hvd.32044048963128 2023-10-08 22:39 UTC +njp.32101076199213 2022-05-17 22:07 UTC +coo.31924051399685 2023-08-01 22:52 UTC +njp.32101076472800 2023-04-21 11:37 UTC +njp.32101076472859 2022-05-18 00:07 UTC +nyp.33433074380688 2023-07-08 15:15 UTC +hvd.32044019842491 2023-03-22 10:56 UTC +uc1.32106001559381 2022-07-11 01:01 UTC +mdp.39015024071642 2023-06-27 17:47 UTC +hvd.hx28d7 2023-03-01 03:52 UTC +uva.x002111617 2022-11-11 10:19 UTC +uc1.$b161790 2022-10-17 00:45 UTC +uc1.$b683534 2023-05-04 12:10 UTC +nyp.33433076066723 2022-10-02 13:14 UTC +miun.ajd7522.0001.001 2012-07-26 23:46 UTC +hvd.hxv9b7 2023-01-01 14:29 UTC +coo.31924065856167 2024-02-18 07:26 UTC +uc1.$b275098 2022-09-12 12:04 UTC +nyp.33433081676979 2024-03-01 09:07 UTC +coo.31924066177589 2023-10-03 14:18 UTC +hvd.32044011432754 2023-01-14 16:05 UTC +njp.32101076384609 2023-04-21 10:48 UTC +pst.000020068974 2023-08-10 06:15 UTC +mdp.39015087700681 2021-02-17 20:42 UTC +njp.32101076403078 2022-05-17 23:09 UTC +njp.32101075673655 2023-06-02 07:36 UTC +udel.31741113248746 2016-07-08 20:19 UTC +hvd.hn34f5 2022-12-05 08:54 UTC +inu.30000066028642 2019-05-02 19:27 UTC +uc1.b3794203 2023-05-31 10:25 UTC +uc1.b3794204 2022-06-03 23:44 UTC +inu.30000099671491 2022-12-16 15:44 UTC +hvd.hnqbsu 2023-03-19 17:15 UTC +mdp.39015020696541 2023-07-05 07:31 UTC +njp.32101075672541 2023-04-21 04:15 UTC +mdp.39015060441675 2023-04-15 03:09 UTC +hvd.32044031554363 2022-12-17 13:26 UTC +hvd.32044048963185 2023-10-08 22:38 UTC +njp.32101076403300 2022-10-25 02:18 UTC +coo.31924057525382 2024-02-10 13:52 UTC +coo.31924065580551 2023-09-11 05:00 UTC +mdp.39015008833884 2023-07-25 00:32 UTC +njp.32101010945275 2023-04-20 07:48 UTC +njp.32101076201159 2024-03-19 13:08 UTC +inu.30000099671632 2023-05-30 09:46 UTC +yale.39002004065844 2011-06-01 03:25 UTC +hvd.32044098627870 2022-12-29 10:25 UTC +inu.30000092253941 2022-12-20 16:29 UTC +mdp.39015060429308 2023-04-15 03:56 UTC +mdp.39015060429464 2023-04-06 06:50 UTC +uc1.b3385165 2023-12-18 07:01 UTC +uiug.30112042710548 2024-02-17 18:52 UTC +uc1.b3548551 2022-10-09 05:09 UTC +uc1.b3850894 2022-09-29 04:50 UTC +hvd.hnqbts 2023-03-19 17:16 UTC +mdp.39015024071824 2023-03-12 01:33 UTC +hvd.32044050827351 2023-03-22 13:37 UTC +coo.31924065585840 2023-10-03 14:21 UTC +mdp.39015048893195 2023-06-27 08:17 UTC +mdp.39015053252139 2022-12-16 11:29 UTC +mdp.39015059489032 2023-07-08 05:46 UTC +uc1.b3385486 2023-06-12 02:37 UTC +inu.30000099671665 2022-12-16 15:45 UTC +mdp.39015060425942 2022-10-30 02:47 UTC +coo.31924066146733 2023-08-11 17:47 UTC +uc1.b2905408 2022-10-17 15:18 UTC +hvd.32044010335081 2022-12-28 23:29 UTC +coo.31924057525861 2024-02-10 13:52 UTC +njp.32101076201167 2024-03-19 13:08 UTC +hvd.32044014419220 2022-12-09 18:23 UTC +mdp.39015053262393 2022-12-15 12:43 UTC +mdp.39015060429746 2023-04-18 12:44 UTC +uc1.b2972967 2023-08-31 14:52 UTC +umn.31951000742933f 2023-09-29 21:37 UTC +hvd.hwqu51 2023-02-28 12:31 UTC +mdp.39015060425751 2023-07-06 13:33 UTC +uiuo.ark:/13960/t4qk01n82 2014-06-04 10:01 UTC +njp.32101063578718 2022-07-16 03:13 UTC +mdp.39015009286215 2023-07-29 07:08 UTC +nyp.33433082488895 2024-03-03 11:56 UTC +hvd.32044098628217 2022-12-29 10:25 UTC +hvd.32044043851013 2023-07-09 10:01 UTC +njp.32101076199171 2024-03-19 13:11 UTC +mdp.39015060426742 2023-07-06 13:32 UTC +nyp.33433082488911 2023-09-22 15:27 UTC +njp.32101075672806 2023-04-21 04:17 UTC +mdp.39015033845549 2023-11-26 06:21 UTC +nyp.33433076055809 2022-10-05 13:13 UTC +njp.32101076425980 2023-04-21 11:02 UTC +coo.31924057522082 2023-08-05 16:39 UTC +mdp.39015004858224 2023-11-08 13:59 UTC +coo.31924065856167 2024-02-18 07:26 UTC +uiug.30112042290434 2024-02-20 04:47 UTC +uc1.b3011277 2023-05-26 14:44 UTC +mdp.39015003346247 2023-07-22 14:29 UTC +mdp.39015049192902 2023-07-22 21:13 UTC +njp.32101076379989 2022-05-17 22:44 UTC +njp.32101076533932 2022-05-18 00:35 UTC +chi.78323978 2023-08-07 17:05 UTC +chi.55229744 2024-02-26 06:56 UTC +njp.32101073758805 2023-03-06 23:56 UTC +uc2.ark:/13960/t4bp05b0f 2018-12-08 13:43 UTC +mdp.39015059488877 2023-07-08 05:46 UTC +chi.78323841 2023-11-30 13:22 UTC +mdp.39015005484020 2022-10-02 20:16 UTC +hvd.32044010332070 2023-02-25 03:53 UTC +njp.32101047468010 2023-09-22 21:04 UTC +uc1.b3627386 2022-09-23 11:58 UTC +hvd.32044092658095 2023-01-16 13:14 UTC +hvd.32044014692362 2023-03-21 10:43 UTC +nyp.33433000183008 2023-05-18 23:09 UTC +njp.32101080222720 2023-10-28 19:10 UTC +njp.32101074834787 2023-06-02 09:40 UTC +uc1.31158010000023 2023-08-24 03:53 UTC +nyp.33433067366678 2024-03-04 02:01 UTC +hvd.hwa2b7 2023-03-20 06:00 UTC +hvd.32044103001111 2023-01-17 20:33 UTC +umn.31951t00020309x 2023-10-01 17:01 UTC +mdp.39015008880067 2022-10-12 19:31 UTC +chi.78023993 2023-08-04 04:30 UTC +hvd.32044092738798 2023-01-16 14:43 UTC +nyp.33433081658886 2023-09-21 08:38 UTC +mdp.39015043572422 2024-01-12 15:17 UTC +njp.32101007684655 2024-03-04 21:45 UTC +uc1.32106020079791 2022-10-16 18:19 UTC +hvd.32044092797216 2023-01-16 15:02 UTC +mdp.39015059846678 2022-11-22 09:32 UTC +mdp.39015036664038 2022-10-06 15:36 UTC +chi.78013704 2023-08-04 04:31 UTC +mdp.39015059395619 2023-07-07 08:28 UTC +uc1.b3627386 2022-09-23 11:58 UTC +hvd.32044048963011 2023-10-08 22:39 UTC +nyp.33433081646642 2022-05-01 03:24 UTC +nyp.33433076055809 2022-10-05 13:13 UTC +mdp.39015060429415 2023-04-06 06:50 UTC +hvd.hxkepr 2023-03-03 05:40 UTC +uiug.30112001676896 2024-02-28 02:20 UTC +mdp.39015060429423 2023-04-18 12:44 UTC +chi.12153205 2024-02-12 15:40 UTC +mdp.39015026482151 2023-07-22 18:55 UTC +mdp.39015063933546 2023-12-31 21:55 UTC +inu.30000099860342 2022-12-20 16:30 UTC +njp.32101076425485 2023-07-06 01:56 UTC +umn.31951p00293997r 2023-01-14 04:12 UTC +wu.89001946482 2023-07-21 14:01 UTC +uc1.b3311895 2023-05-24 06:50 UTC +hvd.32044092624287 2023-01-16 15:26 UTC +uc1.b3627386 2022-09-23 11:58 UTC +uc1.$b31654 2022-05-29 17:18 UTC +inu.30000099671764 2023-01-31 17:30 UTC +hvd.32044094024825 2023-01-16 21:26 UTC +njp.32101020794176 2023-11-01 10:30 UTC +hvd.32044012913166 2022-12-09 14:08 UTC +njp.32101076451556 2023-10-26 04:46 UTC +njp.32101076472966 2022-11-10 16:08 UTC +mdp.39015060429597 2023-04-18 12:44 UTC +coo.31924066328299 2023-12-02 03:03 UTC +coo.31924066328299 2023-12-02 03:03 UTC +njp.32101076180460 2023-07-14 12:36 UTC +mdp.39015012991363 2023-04-05 13:23 UTC +njp.32101075673754 2023-04-21 04:19 UTC +njp.32101075672665 2022-11-05 21:32 UTC +msu.31293018462196 2022-05-28 02:13 UTC +njp.32101077262895 2023-06-30 10:52 UTC +hvd.32044092797182 2023-01-16 15:02 UTC +mdp.39015043572588 2023-08-15 11:16 UTC +nyp.33433081659017 2024-03-05 00:04 UTC +uc1.b2900825 2022-10-17 13:59 UTC +mdp.39015060424127 2023-04-15 03:55 UTC +njp.32101075672905 2022-11-05 23:42 UTC +hvd.32044010396893 2022-12-09 08:13 UTC +mdp.39015010329087 2023-03-31 15:46 UTC +njp.32101076890142 2022-05-18 01:07 UTC +mdp.39015010328121 2023-09-04 14:39 UTC +uc1.b2972398 2023-08-31 14:51 UTC +hvd.ah3rmv 2023-03-18 08:21 UTC +mdp.39015048909868 2022-07-27 21:17 UTC +njp.32101074443415 2023-07-06 19:28 UTC +mdp.39015078153817 2022-12-15 18:56 UTC +hvd.32044040731473 2023-10-08 19:59 UTC +pst.000068744151 2023-12-10 02:20 UTC +njp.32101065266668 2022-09-04 00:16 UTC +njp.32101065266668 2022-09-04 00:16 UTC +hvd.32044086720679 2022-12-10 15:06 UTC +coo.31924057531109 2024-02-18 05:18 UTC +mdp.39015060429332 2023-04-18 12:44 UTC +chi.19606141 2024-02-27 20:53 UTC +hvd.32044010495000 2022-12-31 00:36 UTC +hvd.32044024285587 2022-12-31 02:07 UTC +njp.32101076201175 2024-03-19 13:08 UTC +uva.x030453236 2022-08-26 14:39 UTC +mdp.39015043572661 2022-09-30 17:45 UTC +uc1.b2905409 2022-10-05 17:01 UTC +mdp.39015060429555 2023-04-18 12:44 UTC +mdp.39015060429498 2023-04-15 03:56 UTC +mdp.39015008601109 2022-11-23 18:27 UTC +nyp.33433074853965 2022-12-24 03:37 UTC +njp.32101068158847 2023-05-20 00:32 UTC +hvd.hnlia3 2023-03-19 14:38 UTC +loc.ark:/13960/t3gx4tr12 2011-03-17 01:33 UTC +hvd.tz1l4c 2023-03-21 11:20 UTC +mdp.39015060430108 2023-04-15 03:57 UTC +hvd.tz1l4w 2023-05-31 12:29 UTC +njp.32101010945226 2023-07-05 23:53 UTC +mdp.39015031048211 2023-04-10 09:02 UTC +njp.32101077276523 2023-04-21 17:28 UTC +njp.32101076782703 2023-06-19 12:20 UTC +hvd.32044050831999 2023-03-22 13:37 UTC +mdp.39015012330885 2023-07-05 05:13 UTC +uc1.b3546679 2023-06-11 10:35 UTC +njp.32101075673481 2023-04-21 04:18 UTC +mdp.39015020441104 2022-12-21 00:42 UTC +coo.31924106553286 2023-12-06 22:25 UTC +njp.32101076472792 2023-04-21 11:37 UTC +mdp.39015043572372 2022-09-30 17:45 UTC +hvd.32044050827351 2023-03-22 13:37 UTC +mdp.39015063944592 2022-11-19 07:56 UTC +njp.32101063578650 2023-04-20 09:35 UTC +njp.32101076433125 2023-09-26 15:14 UTC +mdp.39015048893252 2022-12-15 12:05 UTC +coo.31924065856167 2024-02-18 07:26 UTC +njp.32101075716934 2024-03-19 12:06 UTC +coo.31924057525606 2024-02-10 15:13 UTC +coo.31924062186204 2023-12-01 18:53 UTC +uc1.b3385173 2023-06-12 02:44 UTC +mdp.39015059402357 2022-07-27 23:27 UTC +njp.32101075672624 2023-04-21 04:16 UTC +hvd.ah3kfk 2023-03-18 07:39 UTC +njp.32101076795333 2024-03-19 15:01 UTC +njp.32101023869397 2023-04-20 08:03 UTC +uc1.$b312189 2022-09-12 05:57 UTC +njp.32101007684614 2024-03-04 21:45 UTC +hvd.32044092624352 2023-01-16 13:00 UTC +uva.x000240890 2022-09-18 04:24 UTC +nyp.33433067294433 2023-05-14 08:02 UTC +nyp.33433067294433 2023-05-14 08:02 UTC +coo.31924007186517 2023-11-30 00:54 UTC +njp.32101047468002 2023-09-22 21:04 UTC +hvd.32044009907841 2023-10-07 23:22 UTC +hvd.32044021008149 2023-10-08 11:00 UTC +njp.32101076201183 2023-07-03 04:24 UTC +njp.32101076457744 2023-04-21 11:15 UTC +mdp.39015060430082 2023-07-06 13:29 UTC +uc1.b3385513 2023-12-18 06:59 UTC +mdp.39015053252139 2022-12-16 11:29 UTC +inu.30000092253925 2023-04-10 00:21 UTC +pst.000008820648 2023-08-07 19:42 UTC +mdp.39015008570205 2023-06-05 00:18 UTC +coo.31924075116701 2023-09-27 19:07 UTC +nyp.33433089908747 2023-08-08 14:35 UTC +hvd.32044086791217 2022-12-09 17:53 UTC +njp.32101072577347 2024-03-17 02:19 UTC +njp.32101071985772 2022-12-25 02:44 UTC +mdp.39015056480562 2022-11-24 12:09 UTC +umn.31951002792969k 2022-09-14 23:40 UTC +nyp.33433087345637 2023-05-26 11:58 UTC +mdp.39015022469087 2023-08-06 16:32 UTC +inu.30000084048762 2023-04-21 01:25 UTC +njp.32101023869397 2023-04-20 08:03 UTC +mdp.39015008095047 2022-12-15 21:50 UTC +njp.32101077260600 2022-05-18 01:32 UTC +njp.32101077260618 2022-05-18 01:32 UTC +nyp.33433074380704 2023-07-05 06:03 UTC +inu.30000104007657 2023-03-30 18:06 UTC +njp.32101076457702 2023-04-21 11:15 UTC +mdp.39015043800013 2023-04-13 10:29 UTC +nyp.33433004518415 2024-03-08 20:02 UTC +hvd.32044038399135 2023-01-14 23:49 UTC +njp.32101077288247 2023-04-21 15:12 UTC +njp.32101076199130 2023-07-11 11:08 UTC +njp.32101076530979 2023-05-20 05:04 UTC +uc1.c2608792 2023-09-27 23:40 UTC +njp.32101076530979 2023-05-20 05:04 UTC +nyp.33433074380662 2023-05-31 03:09 UTC +hvd.32044011856838 2022-12-31 00:39 UTC +uc1.b3919785 2022-09-30 00:16 UTC +wu.89001946482 2023-07-21 14:01 UTC +uc1.b3311895 2023-05-24 06:50 UTC +uc2.ark:/13960/t8w95458t 2010-05-03 22:15 UTC +mdp.39015013094217 2023-07-22 15:04 UTC +mdp.39015008305289 2023-06-15 15:36 UTC +njp.32101076530979 2023-05-20 05:04 UTC +hvd.hwp8ba 2023-04-07 15:28 UTC +njp.32101076457066 2023-04-05 18:58 UTC +coo1.ark:/13960/t4bp0n867 2022-11-07 18:14 UTC +njp.32101076530979 2023-05-20 05:04 UTC +uc1.b2972949 2023-05-26 05:19 UTC +njp.32101045352828 2023-09-22 21:20 UTC +njp.32101047467988 2023-09-22 21:04 UTC +hvd.32044086759800 2022-12-10 14:16 UTC +uc1.b3885866 2022-09-29 21:15 UTC +mdp.39015060429357 2023-04-18 12:44 UTC +njp.32101077288569 2022-05-18 02:20 UTC +aeu.ark:/13960/t1pg22p71 2014-09-15 07:08 UTC +nyp.33433082488887 2023-01-24 13:51 UTC +njp.32101064475831 2024-03-16 21:24 UTC +uc1.31175035197097 2022-05-24 06:26 UTC +njp.32101076880150 2023-05-13 23:22 UTC +coo.31924008821047 2022-11-25 23:34 UTC +nyp.33433074380720 2023-06-01 19:48 UTC +hvd.32044038400958 2023-10-08 18:51 UTC +mdp.39015030932753 2022-12-15 07:29 UTC +nyp.33433074380696 2023-07-11 10:18 UTC +njp.32101076889508 2023-10-26 04:28 UTC +njp.32101077288239 2023-04-21 15:12 UTC +hvd.32044098627268 2022-12-29 10:23 UTC +hvd.32044092634013 2023-01-15 12:33 UTC +hvd.32044014683114 2023-10-08 04:35 UTC +hvd.32044058190059 2023-01-15 05:02 UTC +umn.31951002804000l 2024-02-02 16:44 UTC +uc1.$b661479 2023-05-03 21:36 UTC +hvd.32044092645134 2023-01-15 11:07 UTC +mdp.39015060429357 2023-04-18 12:44 UTC +mdp.39015074687149 2023-07-20 18:19 UTC +mdp.39015035805772 2023-01-29 00:15 UTC +pst.000068744458 2023-11-01 15:38 UTC +mdp.39015016898432 2023-01-13 01:34 UTC +uc1.b3924130 2022-06-10 04:29 UTC +uc1.b3924129 2022-06-16 11:52 UTC +njp.32101075672509 2023-04-21 04:16 UTC +uc1.b3293449 2022-08-31 05:38 UTC +loc.ark:/13960/t1fj37n7j 2011-03-12 18:03 UTC +loc.ark:/13960/t9280zf6z 2011-03-19 03:14 UTC +njp.32101076200664 2023-04-04 23:21 UTC +njp.32101076403425 2022-05-17 23:10 UTC +mdp.39015060429340 2023-04-18 12:44 UTC +mdp.39015003348201 2023-11-24 05:38 UTC +mdp.39015067091739 2023-04-10 13:54 UTC +mdp.39015059896285 2023-08-06 17:42 UTC +mdp.39015060430397 2023-04-18 12:45 UTC +inu.30000099860565 2022-12-16 15:46 UTC +njp.32101073025528 2023-10-25 16:37 UTC +hvd.32044090276395 2023-06-22 04:37 UTC +hvd.hwilnp 2023-03-20 08:08 UTC +dul1.ark:/13960/t6d23816n 2014-01-10 17:15 UTC +njp.32101075716934 2024-03-19 12:06 UTC +njp.32101075716934 2024-03-19 12:06 UTC +njp.32101076199239 2023-07-04 12:30 UTC +hvd.hnqbsv 2023-03-19 17:15 UTC +nyp.33433081647616 2024-03-01 04:11 UTC +njp.32101037023239 2023-10-28 04:07 UTC +njp.32101037601646 2023-11-01 09:45 UTC +njp.32101063578791 2024-03-06 13:16 UTC +hvd.32044012418034 2023-10-08 02:17 UTC +ucm.5326809190 2022-05-22 09:08 UTC +mdp.39015054289338 2022-11-24 12:41 UTC +njp.32101074443332 2024-03-19 10:54 UTC +njp.32101074443399 2023-07-12 14:30 UTC +njp.32101074443415 2023-07-06 19:28 UTC +njp.32101021580343 2023-04-20 08:03 UTC +inu.30000099860326 2022-12-16 15:47 UTC +njp.32101076041084 2023-08-02 17:35 UTC +njp.32101075672749 2023-04-21 04:17 UTC +njp.32101075729960 2024-03-19 11:48 UTC +hvd.32044098627433 2022-12-29 10:24 UTC +uc1.b3885859 2023-06-02 19:17 UTC +mdp.39015060429530 2023-04-18 12:44 UTC +njp.32101077288213 2023-04-08 12:24 UTC +mdp.39076000323746 2023-08-13 21:22 UTC +inu.32000000683138 2024-01-19 09:06 UTC +mdp.39015027588287 2024-01-25 09:54 UTC +mdp.39015073107768 2024-01-05 21:48 UTC +coo.31924057525671 2024-02-10 13:52 UTC +hvd.32044092711480 2023-01-16 15:29 UTC +uc1.b3924126 2022-06-05 01:58 UTC +hvd.32044038400958 2023-10-08 18:51 UTC +hvd.32044092797232 2023-01-16 15:42 UTC +njp.32101076384435 2023-03-26 09:14 UTC +njp.32101076378189 2023-10-25 19:32 UTC +hvd.32044009957044 2023-10-07 23:28 UTC +njp.32101076378536 2023-10-25 19:32 UTC +hvd.hnqbsx 2023-03-26 02:32 UTC +hvd.32044012418034 2023-10-08 02:17 UTC +nyp.33433081756896 2023-01-18 18:29 UTC +chi.55220547 2024-02-12 15:35 UTC +njp.32101076889979 2023-10-27 10:54 UTC +chi.79213384 2024-02-11 10:28 UTC +uiuo.ark:/13960/t4qk01n82 2014-06-04 10:01 UTC +hvd.32044048962955 2023-02-26 13:35 UTC +hvd.32044092677376 2023-10-10 01:49 UTC +uc1.32106001646766 2023-05-10 19:57 UTC +uc1.32106015528877 2022-06-21 04:43 UTC +nyp.33433081672853 2023-09-21 13:26 UTC +njp.32101076426079 2023-04-21 11:04 UTC +njp.32101076040946 2023-09-26 18:29 UTC +mdp.39015048893823 2023-07-01 02:53 UTC +uc1.b2974316 2023-06-07 20:53 UTC +uc2.ark:/13960/t8ff3wr3q 2010-04-29 11:05 UTC +hvd.hxkepr 2023-03-03 05:40 UTC +njp.32101076457728 2023-04-21 11:15 UTC +coo.31924069259624 2023-10-14 20:07 UTC +hvd.32044092640663 2023-01-17 00:26 UTC +njp.32101075673622 2024-03-19 10:31 UTC +mdp.39015049192910 2023-07-03 08:47 UTC +hvd.32044092796093 2023-01-16 15:01 UTC +mdp.39015030866506 2023-11-08 17:54 UTC +hvd.32044092797190 2023-01-16 15:02 UTC +chi.12755443 2024-02-27 20:53 UTC +hvd.32044048963029 2024-01-03 09:15 UTC +hvd.hnqbsr 2023-03-19 17:15 UTC +mdp.39015048893831 2023-06-27 22:57 UTC +hvd.32044048963136 2024-01-03 09:15 UTC +hvd.32044103001129 2023-01-17 20:00 UTC +hvd.hxe6bx 2023-03-26 04:48 UTC +uiug.30112001676896 2024-02-28 02:20 UTC +inu.30000099671525 2023-07-04 02:42 UTC +inu.30000099671624 2023-05-30 09:46 UTC +uiug.30112046384886 2022-05-19 10:57 UTC +hvd.hnqbtn 2023-03-19 17:16 UTC +njp.32101065266304 2022-09-03 13:08 UTC +njp.32101076201183 2023-07-03 04:24 UTC +mdp.39015060429381 2023-04-09 19:32 UTC +njp.32101075672871 2023-04-21 04:17 UTC +hvd.32044010396893 2022-12-09 08:13 UTC +uc1.b3385477 2023-06-12 02:37 UTC +nyp.33433082219621 2023-06-01 06:45 UTC +njp.32101064467036 2023-11-02 08:51 UTC +hvd.32044031571342 2023-01-14 20:51 UTC +hvd.32044054989868 2023-10-09 01:31 UTC +inu.30000099671566 2023-05-30 09:46 UTC +mdp.39015060429399 2023-04-18 12:44 UTC +mdp.39015033845689 2023-11-26 10:54 UTC +hvd.32044098628274 2022-12-29 11:19 UTC +mdp.39015059397953 2023-04-13 23:26 UTC +nyp.33433075914071 2023-07-03 02:44 UTC +mdp.39015008095104 2023-07-01 22:59 UTC +njp.32101065270892 2023-07-15 08:12 UTC +hvd.32044092711431 2023-01-16 14:37 UTC +coo.31924066518758 2023-11-19 14:39 UTC +uc1.ax0003129954 2022-07-17 21:13 UTC +njp.32101076472909 2023-07-06 01:55 UTC +mdp.39015060429480 2023-04-15 03:08 UTC +mdp.39015030936325 2023-04-06 02:07 UTC +hvd.32044098641632 2022-12-29 10:48 UTC +njp.32101075672855 2023-07-05 05:32 UTC +chi.79279237 2022-08-29 08:22 UTC +hvd.32044011590692 2022-12-31 01:51 UTC +njp.32101076472917 2022-09-10 13:56 UTC +mdp.39015059402340 2023-02-01 16:00 UTC +umn.31951002792970z 2024-01-25 03:01 UTC +njp.32101076472958 2023-07-11 16:57 UTC +mdp.39015060430116 2023-04-18 12:45 UTC +njp.32101076472933 2022-11-10 16:07 UTC +mdp.39015060430058 2023-04-18 12:44 UTC +mdp.39015060430371 2023-04-18 12:45 UTC +mdp.39015014523602 2022-12-15 04:38 UTC +hvd.32044098641343 2022-12-29 11:26 UTC +njp.32101007893256 2023-08-21 10:56 UTC +njp.32101063551608 2023-03-06 12:40 UTC +hvd.32044092754100 2023-01-16 14:48 UTC +mdp.39015041879613 2008-06-05 03:42 UTC +nyp.33433074829270 2022-05-01 00:20 UTC +njp.32101075672608 2023-04-21 05:02 UTC +hvd.hnqbst 2023-03-19 17:15 UTC +hvd.hxe6bz 2023-03-26 05:00 UTC +hvd.32044010396893 2022-12-09 08:13 UTC +nyp.33433082219902 2023-09-21 16:01 UTC +coo1.ark:/13960/t3st84m4q 2015-07-18 06:04 UTC +mdp.39015036664079 2023-01-27 20:38 UTC +loc.ark:/13960/t0xp7hp6s 2011-03-12 12:24 UTC +uc1.c2641998 2023-12-22 22:52 UTC +hvd.32044092796085 2023-10-10 02:10 UTC +hvd.32044092797208 2023-01-16 15:02 UTC +mdp.39015008095153 2023-01-31 19:47 UTC +mdp.39015060429506 2023-04-09 19:32 UTC +uc1.$b272656 2022-09-14 05:41 UTC +hvd.hnle8h 2023-03-19 15:06 UTC +nyp.33433000182992 2023-05-18 20:49 UTC +inu.30000099671723 2023-05-30 09:46 UTC +mdp.39015060429589 2023-04-18 12:44 UTC +hvd.hnqbtj 2023-03-19 17:15 UTC +mdp.39015060429522 2023-04-09 19:32 UTC +hvd.hxe6c3 2023-03-26 05:00 UTC +nyp.33433074894126 2023-03-06 04:28 UTC +njp.32101075672632 2023-04-21 04:16 UTC +mdp.39015043572539 2022-12-26 14:27 UTC +hvd.hx2hrd 2023-04-04 14:47 UTC +njp.32101047467996 2023-09-22 21:04 UTC +njp.32101047468002 2023-09-22 21:04 UTC +coo1.ark:/13960/t70v9287j 2015-07-18 13:54 UTC +njp.32101077879508 2022-09-07 18:55 UTC +mdp.39015060429449 2023-04-18 12:44 UTC +mdp.39015087701341 2024-03-10 19:04 UTC +hvd.32044074313453 2022-12-09 16:18 UTC +njp.32101076457785 2023-04-21 11:15 UTC +mdp.39015060429548 2023-04-15 03:56 UTC +chi.78013677 2023-08-04 04:31 UTC +mdp.39015015383279 2023-08-04 09:23 UTC +nyp.33433076071004 2022-10-05 14:13 UTC +hvd.hnqbtr 2023-03-26 02:32 UTC +inu.30000099671541 2022-12-16 15:45 UTC +coo1.ark:/13960/t9n30f16x 2015-07-18 20:16 UTC +mdp.39015073107529 2024-01-05 20:27 UTC +njp.32101063578627 2024-03-06 13:16 UTC +coo.31924062189661 2023-07-08 12:30 UTC +njp.32101076471414 2023-12-24 13:04 UTC +coo.31924066146733 2023-08-11 17:47 UTC +mdp.39015060429431 2023-04-09 19:32 UTC +mdp.39015049192894 2023-08-07 09:02 UTC +uc1.b2972410 2022-10-01 23:20 UTC +mdp.39015062280055 2022-11-26 21:01 UTC +inu.30000099671558 2023-07-10 04:42 UTC +inu.30000104005750 2023-03-30 18:06 UTC +njp.32101077262788 2023-06-02 10:52 UTC +mdp.39015049192928 2023-04-18 07:10 UTC +njp.32101075672616 2023-04-21 04:16 UTC diff --git a/scripts/version-labels/version-labels-2024-03-21.tsv b/scripts/version-labels/version-labels-2024-03-21.tsv new file mode 100644 index 00000000..4c937b6d --- /dev/null +++ b/scripts/version-labels/version-labels-2024-03-21.tsv @@ -0,0 +1,518 @@ +htid version_label +hvd.32044090278565 2022-12-04 12:12 UTC +nyp.33433081683744 2022-11-10 15:09 UTC +uc1.b3924132 2022-06-16 11:33 UTC +mdp.39015026482151 2023-07-22 18:55 UTC +uiug.30112106245936 2024-02-28 15:57 UTC +hvd.32044009576562 2023-10-07 22:52 UTC +nyp.33433067294433 2023-05-14 08:02 UTC +coo.31924065856167 2024-02-18 07:26 UTC +uc1.ax0002627784 2023-10-27 18:39 UTC +wu.89001946482 2023-07-21 14:01 UTC +uc1.b3311895 2023-05-24 06:50 UTC +hvd.32044048963128 2023-10-08 22:39 UTC +njp.32101076199213 2024-03-20 20:42 UTC +coo.31924051399685 2023-08-01 22:52 UTC +njp.32101076472800 2023-04-21 11:37 UTC +njp.32101076472859 2024-03-20 21:48 UTC +nyp.33433074380688 2023-07-08 15:15 UTC +hvd.32044019842491 2023-03-22 10:56 UTC +uc1.32106001559381 2022-07-11 01:01 UTC +mdp.39015024071642 2023-06-27 17:47 UTC +hvd.hx28d7 2023-03-01 03:52 UTC +uva.x002111617 2022-11-11 10:19 UTC +uc1.$b161790 2022-10-17 00:45 UTC +uc1.$b683534 2023-05-04 12:10 UTC +nyp.33433076066723 2022-10-02 13:14 UTC +miun.ajd7522.0001.001 2012-07-26 23:46 UTC +hvd.hxv9b7 2023-01-01 14:29 UTC +coo.31924065856167 2024-02-18 07:26 UTC +uc1.$b275098 2022-09-12 12:04 UTC +nyp.33433081676979 2024-03-01 09:07 UTC +coo.31924066177589 2023-10-03 14:18 UTC +hvd.32044011432754 2023-01-14 16:05 UTC +njp.32101076384609 2023-04-21 10:48 UTC +pst.000020068974 2023-08-10 06:15 UTC +mdp.39015087700681 2021-02-17 20:42 UTC +njp.32101076403078 2024-03-20 22:55 UTC +njp.32101075673655 2023-06-02 07:36 UTC +udel.31741113248746 2016-07-08 20:19 UTC +hvd.hn34f5 2022-12-05 08:54 UTC +inu.30000066028642 2019-05-02 19:27 UTC +uc1.b3794203 2023-05-31 10:25 UTC +uc1.b3794204 2022-06-03 23:44 UTC +inu.30000099671491 2022-12-16 15:44 UTC +hvd.hnqbsu 2023-03-19 17:15 UTC +mdp.39015020696541 2023-07-05 07:31 UTC +njp.32101075672541 2023-04-21 04:15 UTC +mdp.39015060441675 2023-04-15 03:09 UTC +hvd.32044031554363 2022-12-17 13:26 UTC +hvd.32044048963185 2023-10-08 22:38 UTC +njp.32101076403300 2022-10-25 02:18 UTC +coo.31924057525382 2024-02-10 13:52 UTC +coo.31924065580551 2023-09-11 05:00 UTC +mdp.39015008833884 2023-07-25 00:32 UTC +njp.32101010945275 2023-04-20 07:48 UTC +njp.32101076201159 2024-03-19 13:08 UTC +inu.30000099671632 2023-05-30 09:46 UTC +yale.39002004065844 2011-06-01 03:25 UTC +hvd.32044098627870 2022-12-29 10:25 UTC +inu.30000092253941 2022-12-20 16:29 UTC +mdp.39015060429308 2023-04-15 03:56 UTC +mdp.39015060429464 2023-04-06 06:50 UTC +uc1.b3385165 2023-12-18 07:01 UTC +uiug.30112042710548 2024-02-17 18:52 UTC +uc1.b3548551 2022-10-09 05:09 UTC +uc1.b3850894 2022-09-29 04:50 UTC +hvd.hnqbts 2023-03-19 17:16 UTC +mdp.39015024071824 2023-03-12 01:33 UTC +hvd.32044050827351 2023-03-22 13:37 UTC +coo.31924065585840 2023-10-03 14:21 UTC +mdp.39015048893195 2023-06-27 08:17 UTC +mdp.39015053252139 2022-12-16 11:29 UTC +mdp.39015059489032 2023-07-08 05:46 UTC +uc1.b3385486 2023-06-12 02:37 UTC +inu.30000099671665 2022-12-16 15:45 UTC +mdp.39015060425942 2022-10-30 02:47 UTC +coo.31924066146733 2023-08-11 17:47 UTC +uc1.b2905408 2022-10-17 15:18 UTC +hvd.32044010335081 2022-12-28 23:29 UTC +coo.31924057525861 2024-02-10 13:52 UTC +njp.32101076201167 2024-03-19 13:08 UTC +hvd.32044014419220 2022-12-09 18:23 UTC +mdp.39015053262393 2022-12-15 12:43 UTC +mdp.39015060429746 2023-04-18 12:44 UTC +uc1.b2972967 2023-08-31 14:52 UTC +umn.31951000742933f 2023-09-29 21:37 UTC +hvd.hwqu51 2023-02-28 12:31 UTC +mdp.39015060425751 2023-07-06 13:33 UTC +uiuo.ark:/13960/t4qk01n82 2014-06-04 10:01 UTC +njp.32101063578718 2022-07-16 03:13 UTC +mdp.39015009286215 2023-07-29 07:08 UTC +nyp.33433082488895 2024-03-03 11:56 UTC +hvd.32044098628217 2022-12-29 10:25 UTC +hvd.32044043851013 2023-07-09 10:01 UTC +njp.32101076199171 2024-03-19 13:11 UTC +mdp.39015060426742 2023-07-06 13:32 UTC +nyp.33433082488911 2023-09-22 15:27 UTC +njp.32101075672806 2023-04-21 04:17 UTC +mdp.39015033845549 2023-11-26 06:21 UTC +nyp.33433076055809 2022-10-05 13:13 UTC +njp.32101076425980 2023-04-21 11:02 UTC +coo.31924057522082 2023-08-05 16:39 UTC +mdp.39015004858224 2023-11-08 13:59 UTC +coo.31924065856167 2024-02-18 07:26 UTC +uiug.30112042290434 2024-02-20 04:47 UTC +uc1.b3011277 2023-05-26 14:44 UTC +mdp.39015003346247 2023-07-22 14:29 UTC +mdp.39015049192902 2023-07-22 21:13 UTC +njp.32101076379989 2024-03-20 19:59 UTC +njp.32101076533932 2024-03-21 00:58 UTC +chi.78323978 2023-08-07 17:05 UTC +chi.55229744 2024-02-26 06:56 UTC +njp.32101073758805 2023-03-06 23:56 UTC +uc2.ark:/13960/t4bp05b0f 2018-12-08 13:43 UTC +mdp.39015059488877 2023-07-08 05:46 UTC +chi.78323841 2023-11-30 13:22 UTC +mdp.39015005484020 2022-10-02 20:16 UTC +hvd.32044010332070 2023-02-25 03:53 UTC +njp.32101047468010 2023-09-22 21:04 UTC +uc1.b3627386 2022-09-23 11:58 UTC +hvd.32044092658095 2023-01-16 13:14 UTC +hvd.32044014692362 2023-03-21 10:43 UTC +nyp.33433000183008 2023-05-18 23:09 UTC +njp.32101080222720 2023-10-28 19:10 UTC +njp.32101074834787 2023-06-02 09:40 UTC +uc1.31158010000023 2023-08-24 03:53 UTC +nyp.33433067366678 2024-03-04 02:01 UTC +hvd.hwa2b7 2023-03-20 06:00 UTC +hvd.32044103001111 2023-01-17 20:33 UTC +umn.31951t00020309x 2023-10-01 17:01 UTC +mdp.39015008880067 2022-10-12 19:31 UTC +chi.78023993 2023-08-04 04:30 UTC +hvd.32044092738798 2023-01-16 14:43 UTC +nyp.33433081658886 2023-09-21 08:38 UTC +mdp.39015043572422 2024-01-12 15:17 UTC +njp.32101007684655 2024-03-04 21:45 UTC +uc1.32106020079791 2022-10-16 18:19 UTC +hvd.32044092797216 2023-01-16 15:02 UTC +mdp.39015059846678 2022-11-22 09:32 UTC +mdp.39015036664038 2022-10-06 15:36 UTC +chi.78013704 2023-08-04 04:31 UTC +mdp.39015059395619 2023-07-07 08:28 UTC +uc1.b3627386 2022-09-23 11:58 UTC +hvd.32044048963011 2023-10-08 22:39 UTC +nyp.33433081646642 2022-05-01 03:24 UTC +nyp.33433076055809 2022-10-05 13:13 UTC +mdp.39015060429415 2023-04-06 06:50 UTC +hvd.hxkepr 2023-03-03 05:40 UTC +uiug.30112001676896 2024-02-28 02:20 UTC +mdp.39015060429423 2023-04-18 12:44 UTC +chi.12153205 2024-02-12 15:40 UTC +mdp.39015026482151 2023-07-22 18:55 UTC +mdp.39015063933546 2023-12-31 21:55 UTC +inu.30000099860342 2022-12-20 16:30 UTC +njp.32101076425485 2023-07-06 01:56 UTC +umn.31951p00293997r 2023-01-14 04:12 UTC +wu.89001946482 2023-07-21 14:01 UTC +uc1.b3311895 2023-05-24 06:50 UTC +hvd.32044092624287 2023-01-16 15:26 UTC +uc1.b3627386 2022-09-23 11:58 UTC +uc1.$b31654 2022-05-29 17:18 UTC +inu.30000099671764 2023-01-31 17:30 UTC +hvd.32044094024825 2023-01-16 21:26 UTC +njp.32101020794176 2023-11-01 10:30 UTC +hvd.32044012913166 2022-12-09 14:08 UTC +njp.32101076451556 2023-10-26 04:46 UTC +njp.32101076472966 2022-11-10 16:08 UTC +mdp.39015060429597 2023-04-18 12:44 UTC +coo.31924066328299 2023-12-02 03:03 UTC +coo.31924066328299 2023-12-02 03:03 UTC +njp.32101076180460 2023-07-14 12:36 UTC +mdp.39015012991363 2023-04-05 13:23 UTC +njp.32101075673754 2023-04-21 04:19 UTC +njp.32101075672665 2022-11-05 21:32 UTC +msu.31293018462196 2022-05-28 02:13 UTC +njp.32101077262895 2023-06-30 10:52 UTC +hvd.32044092797182 2023-01-16 15:02 UTC +mdp.39015043572588 2023-08-15 11:16 UTC +nyp.33433081659017 2024-03-05 00:04 UTC +uc1.b2900825 2022-10-17 13:59 UTC +mdp.39015060424127 2023-04-15 03:55 UTC +njp.32101075672905 2022-11-05 23:42 UTC +hvd.32044010396893 2022-12-09 08:13 UTC +mdp.39015010329087 2023-03-31 15:46 UTC +njp.32101076890142 2024-03-21 00:18 UTC +mdp.39015010328121 2023-09-04 14:39 UTC +uc1.b2972398 2023-08-31 14:51 UTC +hvd.ah3rmv 2023-03-18 08:21 UTC +mdp.39015048909868 2022-07-27 21:17 UTC +njp.32101074443415 2023-07-06 19:28 UTC +mdp.39015078153817 2022-12-15 18:56 UTC +hvd.32044040731473 2023-10-08 19:59 UTC +pst.000068744151 2023-12-10 02:20 UTC +njp.32101065266668 2022-09-04 00:16 UTC +njp.32101065266668 2022-09-04 00:16 UTC +hvd.32044086720679 2022-12-10 15:06 UTC +coo.31924057531109 2024-02-18 05:18 UTC +mdp.39015060429332 2023-04-18 12:44 UTC +chi.19606141 2024-02-27 20:53 UTC +hvd.32044010495000 2022-12-31 00:36 UTC +hvd.32044024285587 2022-12-31 02:07 UTC +njp.32101076201175 2024-03-19 13:08 UTC +uva.x030453236 2022-08-26 14:39 UTC +mdp.39015043572661 2022-09-30 17:45 UTC +uc1.b2905409 2022-10-05 17:01 UTC +mdp.39015060429555 2023-04-18 12:44 UTC +mdp.39015060429498 2023-04-15 03:56 UTC +mdp.39015008601109 2022-11-23 18:27 UTC +nyp.33433074853965 2022-12-24 03:37 UTC +njp.32101068158847 2023-05-20 00:32 UTC +hvd.hnlia3 2023-03-19 14:38 UTC +loc.ark:/13960/t3gx4tr12 2011-03-17 01:33 UTC +hvd.tz1l4c 2023-03-21 11:20 UTC +mdp.39015060430108 2023-04-15 03:57 UTC +hvd.tz1l4w 2023-05-31 12:29 UTC +njp.32101010945226 2023-07-05 23:53 UTC +mdp.39015031048211 2023-04-10 09:02 UTC +njp.32101077276523 2023-04-21 17:28 UTC +njp.32101076782703 2023-06-19 12:20 UTC +hvd.32044050831999 2023-03-22 13:37 UTC +mdp.39015012330885 2023-07-05 05:13 UTC +uc1.b3546679 2023-06-11 10:35 UTC +njp.32101075673481 2023-04-21 04:18 UTC +mdp.39015020441104 2022-12-21 00:42 UTC +coo.31924106553286 2023-12-06 22:25 UTC +njp.32101076472792 2023-04-21 11:37 UTC +mdp.39015043572372 2022-09-30 17:45 UTC +hvd.32044050827351 2023-03-22 13:37 UTC +mdp.39015063944592 2022-11-19 07:56 UTC +njp.32101063578650 2023-04-20 09:35 UTC +njp.32101076433125 2023-09-26 15:14 UTC +mdp.39015048893252 2022-12-15 12:05 UTC +coo.31924065856167 2024-02-18 07:26 UTC +njp.32101075716934 2024-03-19 12:06 UTC +coo.31924057525606 2024-02-10 15:13 UTC +coo.31924062186204 2023-12-01 18:53 UTC +uc1.b3385173 2023-06-12 02:44 UTC +mdp.39015059402357 2022-07-27 23:27 UTC +njp.32101075672624 2023-04-21 04:16 UTC +hvd.ah3kfk 2023-03-18 07:39 UTC +njp.32101076795333 2024-03-19 15:01 UTC +njp.32101023869397 2023-04-20 08:03 UTC +uc1.$b312189 2022-09-12 05:57 UTC +njp.32101007684614 2024-03-04 21:45 UTC +hvd.32044092624352 2023-01-16 13:00 UTC +uva.x000240890 2022-09-18 04:24 UTC +nyp.33433067294433 2023-05-14 08:02 UTC +nyp.33433067294433 2023-05-14 08:02 UTC +coo.31924007186517 2023-11-30 00:54 UTC +njp.32101047468002 2023-09-22 21:04 UTC +hvd.32044009907841 2023-10-07 23:22 UTC +hvd.32044021008149 2023-10-08 11:00 UTC +njp.32101076201183 2023-07-03 04:24 UTC +njp.32101076457744 2023-04-21 11:15 UTC +mdp.39015060430082 2023-07-06 13:29 UTC +uc1.b3385513 2023-12-18 06:59 UTC +mdp.39015053252139 2022-12-16 11:29 UTC +inu.30000092253925 2023-04-10 00:21 UTC +pst.000008820648 2023-08-07 19:42 UTC +mdp.39015008570205 2023-06-05 00:18 UTC +coo.31924075116701 2023-09-27 19:07 UTC +nyp.33433089908747 2023-08-08 14:35 UTC +hvd.32044086791217 2022-12-09 17:53 UTC +njp.32101072577347 2024-03-17 02:19 UTC +njp.32101071985772 2022-12-25 02:44 UTC +mdp.39015056480562 2022-11-24 12:09 UTC +umn.31951002792969k 2022-09-14 23:40 UTC +nyp.33433087345637 2023-05-26 11:58 UTC +mdp.39015022469087 2023-08-06 16:32 UTC +inu.30000084048762 2023-04-21 01:25 UTC +njp.32101023869397 2023-04-20 08:03 UTC +mdp.39015008095047 2022-12-15 21:50 UTC +njp.32101077260600 2024-03-20 23:52 UTC +njp.32101077260618 2024-03-20 23:52 UTC +nyp.33433074380704 2023-07-05 06:03 UTC +inu.30000104007657 2023-03-30 18:06 UTC +njp.32101076457702 2023-04-21 11:15 UTC +mdp.39015043800013 2023-04-13 10:29 UTC +nyp.33433004518415 2024-03-08 20:02 UTC +hvd.32044038399135 2023-01-14 23:49 UTC +njp.32101077288247 2023-04-21 15:12 UTC +njp.32101076199130 2023-07-11 11:08 UTC +njp.32101076530979 2023-05-20 05:04 UTC +uc1.c2608792 2023-09-27 23:40 UTC +njp.32101076530979 2023-05-20 05:04 UTC +nyp.33433074380662 2023-05-31 03:09 UTC +hvd.32044011856838 2022-12-31 00:39 UTC +uc1.b3919785 2022-09-30 00:16 UTC +wu.89001946482 2023-07-21 14:01 UTC +uc1.b3311895 2023-05-24 06:50 UTC +uc2.ark:/13960/t8w95458t 2010-05-03 22:15 UTC +mdp.39015013094217 2023-07-22 15:04 UTC +mdp.39015008305289 2023-06-15 15:36 UTC +njp.32101076530979 2023-05-20 05:04 UTC +hvd.hwp8ba 2023-04-07 15:28 UTC +njp.32101076457066 2023-04-05 18:58 UTC +coo1.ark:/13960/t4bp0n867 2022-11-07 18:14 UTC +njp.32101076530979 2023-05-20 05:04 UTC +uc1.b2972949 2023-05-26 05:19 UTC +njp.32101045352828 2023-09-22 21:20 UTC +njp.32101047467988 2023-09-22 21:04 UTC +hvd.32044086759800 2022-12-10 14:16 UTC +uc1.b3885866 2022-09-29 21:15 UTC +mdp.39015060429357 2023-04-18 12:44 UTC +njp.32101077288569 2024-03-20 23:10 UTC +aeu.ark:/13960/t1pg22p71 2014-09-15 07:08 UTC +nyp.33433082488887 2023-01-24 13:51 UTC +njp.32101064475831 2024-03-16 21:24 UTC +uc1.31175035197097 2022-05-24 06:26 UTC +njp.32101076880150 2023-05-13 23:22 UTC +coo.31924008821047 2022-11-25 23:34 UTC +nyp.33433074380720 2023-06-01 19:48 UTC +hvd.32044038400958 2023-10-08 18:51 UTC +mdp.39015030932753 2022-12-15 07:29 UTC +nyp.33433074380696 2023-07-11 10:18 UTC +njp.32101076889508 2023-10-26 04:28 UTC +njp.32101077288239 2023-04-21 15:12 UTC +hvd.32044098627268 2022-12-29 10:23 UTC +hvd.32044092634013 2023-01-15 12:33 UTC +hvd.32044014683114 2023-10-08 04:35 UTC +hvd.32044058190059 2023-01-15 05:02 UTC +umn.31951002804000l 2024-02-02 16:44 UTC +uc1.$b661479 2023-05-03 21:36 UTC +hvd.32044092645134 2023-01-15 11:07 UTC +mdp.39015060429357 2023-04-18 12:44 UTC +mdp.39015074687149 2023-07-20 18:19 UTC +mdp.39015035805772 2023-01-29 00:15 UTC +pst.000068744458 2023-11-01 15:38 UTC +mdp.39015016898432 2023-01-13 01:34 UTC +uc1.b3924130 2022-06-10 04:29 UTC +uc1.b3924129 2022-06-16 11:52 UTC +njp.32101075672509 2023-04-21 04:16 UTC +uc1.b3293449 2022-08-31 05:38 UTC +loc.ark:/13960/t1fj37n7j 2011-03-12 18:03 UTC +loc.ark:/13960/t9280zf6z 2011-03-19 03:14 UTC +njp.32101076200664 2023-04-04 23:21 UTC +njp.32101076403425 2024-03-20 22:53 UTC +mdp.39015060429340 2023-04-18 12:44 UTC +mdp.39015003348201 2023-11-24 05:38 UTC +mdp.39015067091739 2023-04-10 13:54 UTC +mdp.39015059896285 2023-08-06 17:42 UTC +mdp.39015060430397 2023-04-18 12:45 UTC +inu.30000099860565 2022-12-16 15:46 UTC +njp.32101073025528 2023-10-25 16:37 UTC +hvd.32044090276395 2023-06-22 04:37 UTC +hvd.hwilnp 2023-03-20 08:08 UTC +dul1.ark:/13960/t6d23816n 2014-01-10 17:15 UTC +njp.32101075716934 2024-03-19 12:06 UTC +njp.32101075716934 2024-03-19 12:06 UTC +njp.32101076199239 2023-07-04 12:30 UTC +hvd.hnqbsv 2023-03-19 17:15 UTC +nyp.33433081647616 2024-03-01 04:11 UTC +njp.32101037023239 2023-10-28 04:07 UTC +njp.32101037601646 2023-11-01 09:45 UTC +njp.32101063578791 2024-03-06 13:16 UTC +hvd.32044012418034 2023-10-08 02:17 UTC +ucm.5326809190 2022-05-22 09:08 UTC +mdp.39015054289338 2022-11-24 12:41 UTC +njp.32101074443332 2024-03-19 10:54 UTC +njp.32101074443399 2023-07-12 14:30 UTC +njp.32101074443415 2023-07-06 19:28 UTC +njp.32101021580343 2023-04-20 08:03 UTC +inu.30000099860326 2022-12-16 15:47 UTC +njp.32101076041084 2023-08-02 17:35 UTC +njp.32101075672749 2023-04-21 04:17 UTC +njp.32101075729960 2024-03-19 11:48 UTC +hvd.32044098627433 2022-12-29 10:24 UTC +uc1.b3885859 2023-06-02 19:17 UTC +mdp.39015060429530 2023-04-18 12:44 UTC +njp.32101077288213 2023-04-08 12:24 UTC +mdp.39076000323746 2023-08-13 21:22 UTC +inu.32000000683138 2024-01-19 09:06 UTC +mdp.39015027588287 2024-01-25 09:54 UTC +mdp.39015073107768 2024-01-05 21:48 UTC +coo.31924057525671 2024-02-10 13:52 UTC +hvd.32044092711480 2023-01-16 15:29 UTC +uc1.b3924126 2022-06-05 01:58 UTC +hvd.32044038400958 2023-10-08 18:51 UTC +hvd.32044092797232 2023-01-16 15:42 UTC +njp.32101076384435 2023-03-26 09:14 UTC +njp.32101076378189 2023-10-25 19:32 UTC +hvd.32044009957044 2023-10-07 23:28 UTC +njp.32101076378536 2023-10-25 19:32 UTC +hvd.hnqbsx 2023-03-26 02:32 UTC +hvd.32044012418034 2023-10-08 02:17 UTC +nyp.33433081756896 2023-01-18 18:29 UTC +chi.55220547 2024-02-12 15:35 UTC +njp.32101076889979 2023-10-27 10:54 UTC +chi.79213384 2024-02-11 10:28 UTC +uiuo.ark:/13960/t4qk01n82 2014-06-04 10:01 UTC +hvd.32044048962955 2023-02-26 13:35 UTC +hvd.32044092677376 2023-10-10 01:49 UTC +uc1.32106001646766 2023-05-10 19:57 UTC +uc1.32106015528877 2022-06-21 04:43 UTC +nyp.33433081672853 2023-09-21 13:26 UTC +njp.32101076426079 2023-04-21 11:04 UTC +njp.32101076040946 2023-09-26 18:29 UTC +mdp.39015048893823 2023-07-01 02:53 UTC +uc1.b2974316 2023-06-07 20:53 UTC +uc2.ark:/13960/t8ff3wr3q 2010-04-29 11:05 UTC +hvd.hxkepr 2023-03-03 05:40 UTC +njp.32101076457728 2023-04-21 11:15 UTC +coo.31924069259624 2023-10-14 20:07 UTC +hvd.32044092640663 2023-01-17 00:26 UTC +njp.32101075673622 2024-03-19 10:31 UTC +mdp.39015049192910 2023-07-03 08:47 UTC +hvd.32044092796093 2023-01-16 15:01 UTC +mdp.39015030866506 2023-11-08 17:54 UTC +hvd.32044092797190 2023-01-16 15:02 UTC +chi.12755443 2024-02-27 20:53 UTC +hvd.32044048963029 2024-01-03 09:15 UTC +hvd.hnqbsr 2023-03-19 17:15 UTC +mdp.39015048893831 2023-06-27 22:57 UTC +hvd.32044048963136 2024-01-03 09:15 UTC +hvd.32044103001129 2023-01-17 20:00 UTC +hvd.hxe6bx 2023-03-26 04:48 UTC +uiug.30112001676896 2024-02-28 02:20 UTC +inu.30000099671525 2023-07-04 02:42 UTC +inu.30000099671624 2023-05-30 09:46 UTC +uiug.30112046384886 2022-05-19 10:57 UTC +hvd.hnqbtn 2023-03-19 17:16 UTC +njp.32101065266304 2022-09-03 13:08 UTC +njp.32101076201183 2023-07-03 04:24 UTC +mdp.39015060429381 2023-04-09 19:32 UTC +njp.32101075672871 2023-04-21 04:17 UTC +hvd.32044010396893 2022-12-09 08:13 UTC +uc1.b3385477 2023-06-12 02:37 UTC +nyp.33433082219621 2023-06-01 06:45 UTC +njp.32101064467036 2023-11-02 08:51 UTC +hvd.32044031571342 2023-01-14 20:51 UTC +hvd.32044054989868 2023-10-09 01:31 UTC +inu.30000099671566 2023-05-30 09:46 UTC +mdp.39015060429399 2023-04-18 12:44 UTC +mdp.39015033845689 2023-11-26 10:54 UTC +hvd.32044098628274 2022-12-29 11:19 UTC +mdp.39015059397953 2023-04-13 23:26 UTC +nyp.33433075914071 2023-07-03 02:44 UTC +mdp.39015008095104 2023-07-01 22:59 UTC +njp.32101065270892 2023-07-15 08:12 UTC +hvd.32044092711431 2023-01-16 14:37 UTC +coo.31924066518758 2023-11-19 14:39 UTC +uc1.ax0003129954 2022-07-17 21:13 UTC +njp.32101076472909 2023-07-06 01:55 UTC +mdp.39015060429480 2023-04-15 03:08 UTC +mdp.39015030936325 2023-04-06 02:07 UTC +hvd.32044098641632 2022-12-29 10:48 UTC +njp.32101075672855 2023-07-05 05:32 UTC +chi.79279237 2022-08-29 08:22 UTC +hvd.32044011590692 2022-12-31 01:51 UTC +njp.32101076472917 2022-09-10 13:56 UTC +mdp.39015059402340 2023-02-01 16:00 UTC +umn.31951002792970z 2024-01-25 03:01 UTC +njp.32101076472958 2023-07-11 16:57 UTC +mdp.39015060430116 2023-04-18 12:45 UTC +njp.32101076472933 2022-11-10 16:07 UTC +mdp.39015060430058 2023-04-18 12:44 UTC +mdp.39015060430371 2023-04-18 12:45 UTC +mdp.39015014523602 2022-12-15 04:38 UTC +hvd.32044098641343 2022-12-29 11:26 UTC +njp.32101007893256 2023-08-21 10:56 UTC +njp.32101063551608 2023-03-06 12:40 UTC +hvd.32044092754100 2023-01-16 14:48 UTC +mdp.39015041879613 2008-06-05 03:42 UTC +nyp.33433074829270 2022-05-01 00:20 UTC +njp.32101075672608 2023-04-21 05:02 UTC +hvd.hnqbst 2023-03-19 17:15 UTC +hvd.hxe6bz 2023-03-26 05:00 UTC +hvd.32044010396893 2022-12-09 08:13 UTC +nyp.33433082219902 2023-09-21 16:01 UTC +coo1.ark:/13960/t3st84m4q 2015-07-18 06:04 UTC +mdp.39015036664079 2023-01-27 20:38 UTC +loc.ark:/13960/t0xp7hp6s 2011-03-12 12:24 UTC +uc1.c2641998 2023-12-22 22:52 UTC +hvd.32044092796085 2023-10-10 02:10 UTC +hvd.32044092797208 2023-01-16 15:02 UTC +mdp.39015008095153 2023-01-31 19:47 UTC +mdp.39015060429506 2023-04-09 19:32 UTC +uc1.$b272656 2022-09-14 05:41 UTC +hvd.hnle8h 2023-03-19 15:06 UTC +nyp.33433000182992 2023-05-18 20:49 UTC +inu.30000099671723 2023-05-30 09:46 UTC +mdp.39015060429589 2023-04-18 12:44 UTC +hvd.hnqbtj 2023-03-19 17:15 UTC +mdp.39015060429522 2023-04-09 19:32 UTC +hvd.hxe6c3 2023-03-26 05:00 UTC +nyp.33433074894126 2023-03-06 04:28 UTC +njp.32101075672632 2023-04-21 04:16 UTC +mdp.39015043572539 2022-12-26 14:27 UTC +hvd.hx2hrd 2023-04-04 14:47 UTC +njp.32101047467996 2023-09-22 21:04 UTC +njp.32101047468002 2023-09-22 21:04 UTC +coo1.ark:/13960/t70v9287j 2015-07-18 13:54 UTC +njp.32101077879508 2022-09-07 18:55 UTC +mdp.39015060429449 2023-04-18 12:44 UTC +mdp.39015087701341 2024-03-10 19:04 UTC +hvd.32044074313453 2022-12-09 16:18 UTC +njp.32101076457785 2023-04-21 11:15 UTC +mdp.39015060429548 2023-04-15 03:56 UTC +chi.78013677 2023-08-04 04:31 UTC +mdp.39015015383279 2023-08-04 09:23 UTC +nyp.33433076071004 2022-10-05 14:13 UTC +hvd.hnqbtr 2023-03-26 02:32 UTC +inu.30000099671541 2022-12-16 15:45 UTC +coo1.ark:/13960/t9n30f16x 2015-07-18 20:16 UTC +mdp.39015073107529 2024-01-05 20:27 UTC +njp.32101063578627 2024-03-06 13:16 UTC +coo.31924062189661 2023-07-08 12:30 UTC +njp.32101076471414 2023-12-24 13:04 UTC +coo.31924066146733 2023-08-11 17:47 UTC +mdp.39015060429431 2023-04-09 19:32 UTC +mdp.39015049192894 2023-08-07 09:02 UTC +uc1.b2972410 2022-10-01 23:20 UTC +mdp.39015062280055 2022-11-26 21:01 UTC +inu.30000099671558 2023-07-10 04:42 UTC +inu.30000104005750 2023-03-30 18:06 UTC +njp.32101077262788 2023-06-02 10:52 UTC +mdp.39015049192928 2023-04-18 07:10 UTC +njp.32101075672616 2023-04-21 04:16 UTC From bd7a6b1c6f1c54debc54daf979d5a60780a9d017 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 21 Mar 2024 18:19:24 -0400 Subject: [PATCH 26/71] Customize fields when saving as new #591 --- ppa/archive/admin.py | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/ppa/archive/admin.py b/ppa/archive/admin.py index 6849fbe9..e7931162 100644 --- a/ppa/archive/admin.py +++ b/ppa/archive/admin.py @@ -177,9 +177,54 @@ def source_link(self, obj): source_link.short_description = "Source id" source_link.admin_order_field = "source_id" + def change_view(self, request, object_id, form_url="", extra_context=None): + # customize behavior when copying a record and saving as new + if request.POST.get("_saveasnew"): + # if source is unset, this means we are loading the "save as new" + # form for a hathitrust record + if not request.POST.get("source"): + # customize save as new field contents + instance = DigitizedWork.objects.get(pk=object_id) + # make a copy of the querydict so we can update it + post_params = request.POST.copy() + # read-only fields should be preserved + post_params["source"] = instance.source + post_params["source_id"] = instance.source_id + post_params["source_url"] = instance.source_url + post_params["record_id"] = instance.record_id + # clear out fields that should be changed when excerpting + clear_fields = [ + "title", + "sort_title", + "author", + "pages_orig", + "pages_digital", + # "page_count", # read-only, does not automatically propagate + "notes", + "public_notes", + "collections", + "cluster", + ] + for field in clear_fields: + try: + del post_params[field] + except KeyError: + pass + + # update request with our modified post parameters + request.POST = post_params + + return super().change_view( + request, + object_id, + form_url, + extra_context=extra_context, + ) + def save_model(self, request, obj, form, change): """Note any fields in the protected list that have been changed in the admin and preserve in database.""" + # If new object, created from scratch, nothing to track and preserve # or if item is not a HathiTrust item, save and return if not change or obj.source != DigitizedWork.HATHI: From c34c92db5a85de3de377c2aafca9856286493f5e Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 21 Mar 2024 18:20:06 -0400 Subject: [PATCH 27/71] Handle excerpt page count & indexing when saving new records #591 --- ppa/archive/models.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/ppa/archive/models.py b/ppa/archive/models.py index 365ff996..f8c87939 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -598,18 +598,25 @@ def save(self, *args, **kwargs): self.source_id = new_source_id self.pages_digital = new_pages_digital - if self.has_changed("pages_digital"): + # if excerpt page range has changed + # OR this is a new record with a page range + if self.has_changed("pages_digital") or ( + self.pk is None and self.pages_digital + ): # update the page count if possible (i.e., not a Gale record) self.page_count = self.count_pages() - # if there is a page range set, update page count and index - if self.pages_digital: + # if page range changed on existing record, clear out old index + if self.pages_digital and self.pk is not None: # update index to remove all pages that are no longer in range self.solr.update.delete_by_query( 'source_id:"%s" AND item_type:page NOT order:(%s)' % (self.source_id, " OR ".join(str(p) for p in self.page_span)) ) # any page range change requires reindexing (potentially slow) - logger.debug("Reindexing pages for %s after change to page range", self) + if self.pk is None: + logger.debug("Indexing pages for new excerpt %s", self) + else: + logger.debug("Reindexing pages for %s after change to page range", self) self.index_items(Page.page_index_data(self)) # NOTE: removing a page range may not work as expected # (does not recalculate page count; cannot recalculate for Gale items) From 191e84b9535823c9d1bd57afaaa7d2c5d24b2db5 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 22 Mar 2024 11:18:07 -0400 Subject: [PATCH 28/71] Update unit tests and remove redundant excerpt logic --- .../management/commands/hathi_excerpt.py | 19 +++++++------------ ppa/archive/tests/test_views.py | 4 +++- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/ppa/archive/management/commands/hathi_excerpt.py b/ppa/archive/management/commands/hathi_excerpt.py index ea1fb458..38aa9585 100644 --- a/ppa/archive/management/commands/hathi_excerpt.py +++ b/ppa/archive/management/commands/hathi_excerpt.py @@ -41,7 +41,7 @@ from django.core.management.base import BaseCommand, CommandError from parasolr.django.signals import IndexableSignalHandler -from ppa.archive.models import Collection, DigitizedWork, Page +from ppa.archive.models import Collection, DigitizedWork logger = logging.getLogger(__name__) @@ -83,7 +83,8 @@ def handle(self, *args, **kwargs): self.excerpt(row) self.stdout.write( - "\nExcerpted {excerpted:,d} existing records; created {created:,d} new excerpts. {error:,d} errors.".format_map( + "\nExcerpted {excerpted:,d} existing records; " + + "created {created:,d} new excerpts. {error:,d} errors.".format_map( self.stats ) ) @@ -146,12 +147,9 @@ def excerpt(self, row): digwork.public_notes = row.get("Public Notes", "") try: - # Calculate & save number of pages based on page range. - # (automatically calculated on save for excerpt but not - # for newly created items) - # Could trigger parse error if page span is invalid. - digwork.page_count = digwork.count_pages() # save to create or update in the database + # page count is automatically calculated on save for excerpts + # Could trigger parse error if page span is invalid. digwork.save() except intspan.ParseError as err: self.stderr.write( @@ -173,14 +171,11 @@ def excerpt(self, row): if created: self.stats["created"] += 1 - # any page range change requires reindexing (potentially slow) - logger.debug("Indexing pages for new excerpt %s", digwork) - DigitizedWork.index_items(Page.page_index_data(digwork)) - + # pages are automatically indexed when saving a new excerpt else: + self.stats["excerpted"] += 1 # Indexed pages are automatically updated for existing records on save # when page range has changed. - self.stats["excerpted"] += 1 DigitizedWork.index_items([digwork]) diff --git a/ppa/archive/tests/test_views.py b/ppa/archive/tests/test_views.py index 8cfef2b0..18683a59 100644 --- a/ppa/archive/tests/test_views.py +++ b/ppa/archive/tests/test_views.py @@ -170,8 +170,10 @@ def test_anonymous_display_excerpt_hathi(self): response, hathi_page_url(excerpt.source_id, excerpt.first_page()) ) - def test_anonymous_display_excerpt_gale(self): + @patch("ppa.archive.models.DigitizedWork.index_items") + def test_anonymous_display_excerpt_gale(self, mock_index_items): # create a gale excerpt to test link logic + # patch index_items to skip attempting to index pages excerpt = DigitizedWork.objects.create( source_id="abc.1234", source_url="https://hdl.example.co/9823/abc.1234", From c82fd2f95a5e2ec34fb57cd8580a789c497fce8d Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 22 Mar 2024 11:51:02 -0400 Subject: [PATCH 29/71] Update tests so creating test excerpts works without hathi data --- ppa/archive/management/commands/hathi_excerpt.py | 7 +++---- ppa/archive/tests/test_models.py | 3 ++- ppa/archive/tests/test_views.py | 9 ++++++--- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/ppa/archive/management/commands/hathi_excerpt.py b/ppa/archive/management/commands/hathi_excerpt.py index 38aa9585..8b7bec24 100644 --- a/ppa/archive/management/commands/hathi_excerpt.py +++ b/ppa/archive/management/commands/hathi_excerpt.py @@ -83,10 +83,9 @@ def handle(self, *args, **kwargs): self.excerpt(row) self.stdout.write( - "\nExcerpted {excerpted:,d} existing records; " - + "created {created:,d} new excerpts. {error:,d} errors.".format_map( - self.stats - ) + f"\nExcerpted {self.stats['excerpted']:,d} existing records; " + + f"created {self.stats['created']:,d} new excerpts. " + + f"{self.stats['error']:,d} errors." ) def load_collections(self): diff --git a/ppa/archive/tests/test_models.py b/ppa/archive/tests/test_models.py index cdc57fde..42abb1b0 100644 --- a/ppa/archive/tests/test_models.py +++ b/ppa/archive/tests/test_models.py @@ -689,7 +689,8 @@ def test_save_suppress(self): work.save() mock_hathiobj.delete_pairtree_data.assert_not_called() - def test_save_suppress_excerpt(self): + @patch("ppa.archive.models.DigitizedWork.index_items") + def test_save_suppress_excerpt(self, mock_index_items): work = DigitizedWork(source_id="chi.79279237", item_type=DigitizedWork.EXCERPT) with patch.object(work, "hathi") as mock_hathiobj: # no change in status - nothing should happen diff --git a/ppa/archive/tests/test_views.py b/ppa/archive/tests/test_views.py index 18683a59..afc4d41c 100644 --- a/ppa/archive/tests/test_views.py +++ b/ppa/archive/tests/test_views.py @@ -150,7 +150,8 @@ def test_anonymous_display_no_volume(self): msg_prefix="Volume metadata should not display if no enumcron", ) - def test_anonymous_display_excerpt_hathi(self): + @patch("ppa.archive.models.DigitizedWork.index_items") + def test_anonymous_display_excerpt_hathi(self, mock_index_items): # create an excerpt excerpt = DigitizedWork.objects.create( source_id="abc.1234", @@ -192,7 +193,8 @@ def test_anonymous_display_excerpt_gale(self, mock_index_items): ), ) - def test_anonymous_display_article_hathi(self): + @patch("ppa.archive.models.DigitizedWork.index_items") + def test_anonymous_display_article_hathi(self, mock_index_items): # create an article article = DigitizedWork.objects.create( source_id="abc.1234", @@ -426,7 +428,8 @@ def test_search_within_ajax(self): # should have pagination self.assertContains(response, '
Date: Mon, 25 Mar 2024 11:11:39 -0400 Subject: [PATCH 30/71] Include protected_fields in read-only fields copied when saving as new ref #591 --- ppa/archive/admin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ppa/archive/admin.py b/ppa/archive/admin.py index e7931162..c8a34104 100644 --- a/ppa/archive/admin.py +++ b/ppa/archive/admin.py @@ -192,6 +192,7 @@ def change_view(self, request, object_id, form_url="", extra_context=None): post_params["source_id"] = instance.source_id post_params["source_url"] = instance.source_url post_params["record_id"] = instance.record_id + post_params["protected_fields"] = instance.protected_fields # clear out fields that should be changed when excerpting clear_fields = [ "title", From f586df1c2309fc4ca8c637ce7d3289b140c21789 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Mar 2024 13:15:05 -0400 Subject: [PATCH 31/71] Make protected_fields editable so we can copy when using save as new ref #591 --- ppa/archive/admin.py | 13 ++++++++++++- ppa/archive/tests/test_admin.py | 8 +++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/ppa/archive/admin.py b/ppa/archive/admin.py index c8a34104..a8670cec 100644 --- a/ppa/archive/admin.py +++ b/ppa/archive/admin.py @@ -158,6 +158,13 @@ def get_readonly_fields(self, request, obj=None): """ if obj and obj.source == DigitizedWork.HATHI: return self.hathi_readonly_fields + self.readonly_fields + + print(request.POST) + if request.POST.get("_saveasnew"): + # protected fields must not be read-only in order + # to preserve/copy when saving as new + return ("added", "updated") + return self.readonly_fields def list_collections(self, obj): @@ -192,7 +199,11 @@ def change_view(self, request, object_id, form_url="", extra_context=None): post_params["source_id"] = instance.source_id post_params["source_url"] = instance.source_url post_params["record_id"] = instance.record_id - post_params["protected_fields"] = instance.protected_fields + # copy protected wield flags in simple string format + post_params[ + "protected_fields" + ] = instance.protected_fields.to_simple_str() + # clear out fields that should be changed when excerpting clear_fields = [ "title", diff --git a/ppa/archive/tests/test_admin.py b/ppa/archive/tests/test_admin.py index 860a4a80..d3d6c067 100644 --- a/ppa/archive/tests/test_admin.py +++ b/ppa/archive/tests/test_admin.py @@ -56,7 +56,13 @@ def test_readonly_fields(self): site = AdminSite() digadmin = DigitizedWorkAdmin(DigitizedWork, site) - assert digadmin.get_readonly_fields(Mock()) == digadmin.readonly_fields + assert digadmin.get_readonly_fields(Mock(POST={})) == digadmin.readonly_fields + + # when using 'save as new', protected fields should not be read only + assert digadmin.get_readonly_fields(Mock(POST={"_saveasnew": 1})) == ( + "added", + "updated", + ) # hathi record hathi_work = DigitizedWork.objects.first() From 07fe596b86c8adadd58555b0d456565807e8a652 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Mar 2024 15:00:26 -0400 Subject: [PATCH 32/71] Make protected_fields not required when editing in admin --- ...21_alter_digitizedwork_protected_fields.py | 22 +++++++++++++++++++ ppa/archive/models.py | 7 ++++-- 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 ppa/archive/migrations/0021_alter_digitizedwork_protected_fields.py diff --git a/ppa/archive/migrations/0021_alter_digitizedwork_protected_fields.py b/ppa/archive/migrations/0021_alter_digitizedwork_protected_fields.py new file mode 100644 index 00000000..887f3aed --- /dev/null +++ b/ppa/archive/migrations/0021_alter_digitizedwork_protected_fields.py @@ -0,0 +1,22 @@ +# Generated by Django 5.0.2 on 2024-03-25 18:52 + +import ppa.archive.models +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("archive", "0020_digitizedwork_page_count_help_text"), + ] + + operations = [ + migrations.AlterField( + model_name="digitizedwork", + name="protected_fields", + field=ppa.archive.models.ProtectedWorkField( + blank=True, + default=ppa.archive.models.ProtectedWorkFieldFlags, + help_text="Fields protected from HathiTrust bulk update because they have been manually edited in the Django admin.", + ), + ), + ] diff --git a/ppa/archive/models.py b/ppa/archive/models.py index f8c87939..4bcbc0bd 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -184,8 +184,10 @@ class ProtectedWorkField(models.Field): ) def __init__(self, verbose_name=None, name=None, **kwargs): - """Make the field unnullable and not allowed to be blank.""" - super().__init__(verbose_name, name, blank=False, null=False, **kwargs) + """Make the field unnullable; by default, not allowed to be blank.""" + if "blank" not in kwargs: + kwargs["blank"] = False + super().__init__(verbose_name, name, null=False, **kwargs) def from_db_value(self, value, expression, connection): """Always return an instance of :class:`ProtectedWorkFieldFlags`""" @@ -403,6 +405,7 @@ class DigitizedWork(ModelIndexable, TrackChangesModel): #: modified in Django admin. protected_fields = ProtectedWorkField( default=ProtectedWorkFieldFlags, + blank=True, # required for save as new, where we make editable to copy help_text="Fields protected from HathiTrust bulk " "update because they have been manually edited in the " "Django admin.", From 36f6065bf12cf6b586cd2030a04fcac68e4405e8 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Mar 2024 16:36:40 -0400 Subject: [PATCH 33/71] Handle empty string when converting value to protected field flag --- ppa/archive/models.py | 2 ++ ppa/archive/tests/test_models.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/ppa/archive/models.py b/ppa/archive/models.py index 4bcbc0bd..1a9cead6 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -198,6 +198,8 @@ def get_internal_type(self): return "PositiveSmallIntegerField" def get_prep_value(self, value): + if value == "": + return 0 return int(value) def to_python(self, value): diff --git a/ppa/archive/tests/test_models.py b/ppa/archive/tests/test_models.py index 42abb1b0..e21e688a 100644 --- a/ppa/archive/tests/test_models.py +++ b/ppa/archive/tests/test_models.py @@ -28,6 +28,7 @@ DigitizedWork, Page, ProtectedWorkFieldFlags, + ProtectedWorkField, ) FIXTURES_PATH = os.path.join(settings.BASE_DIR, "ppa", "archive", "fixtures") @@ -49,6 +50,13 @@ def test_str(self): assert str(fields) == "enumcron, sort_title, title" +class TestProtectedWorkField(TestCase): + def test_get_prep_value(self): + assert ProtectedWorkField().get_prep_value("1") == 1 + # handle empty string + assert ProtectedWorkField().get_prep_value("") == 0 + + @pytest.mark.django_db class TestSignalHandlers: @patch.object(ModelIndexable, "index_items") From 9954fcc9f5b3c7e330325e10f0ec0816bfc3bdf2 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 25 Mar 2024 16:36:40 -0400 Subject: [PATCH 34/71] Remove debug print statement in admin view --- ppa/archive/admin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ppa/archive/admin.py b/ppa/archive/admin.py index a8670cec..e9266364 100644 --- a/ppa/archive/admin.py +++ b/ppa/archive/admin.py @@ -159,7 +159,6 @@ def get_readonly_fields(self, request, obj=None): if obj and obj.source == DigitizedWork.HATHI: return self.hathi_readonly_fields + self.readonly_fields - print(request.POST) if request.POST.get("_saveasnew"): # protected fields must not be read-only in order # to preserve/copy when saving as new From c07effcd57315db4ce6072239f69b83e7dbd5da7 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 28 Mar 2024 12:32:04 -0400 Subject: [PATCH 35/71] Warn on missing page count; improve output for negative work/page diff resolves #596 --- .../management/commands/index_pages.py | 37 +++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/ppa/archive/management/commands/index_pages.py b/ppa/archive/management/commands/index_pages.py index a5c49a77..7682c6d2 100644 --- a/ppa/archive/management/commands/index_pages.py +++ b/ppa/archive/management/commands/index_pages.py @@ -117,9 +117,27 @@ def handle(self, *args, **kwargs): if self.verbosity >= self.v_normal: if work_diff: - self.stdout.write(f"{work_diff:,} works not indexed in Solr") + # negative = more works in solr than database + if work_diff < 0: + self.stdout.write( + self.style.WARNING( + f"{abs(work_diff):,} extra works indexed in Solr; " + + " may need to clear old data" + ) + ) + else: + self.stdout.write(f"{work_diff:,} works not indexed in Solr") if page_diff: - self.stdout.write(f"{page_diff:,} pages not indexed in Solr") + # negative = more pages in solr than expected + if work_diff < 0: + self.stdout.write( + self.style.WARNING( + f"{abs(page_diff):,} more pages indexed in Solr than expected" + ) + ) + + else: + self.stdout.write(f"{page_diff:,} pages not indexed in Solr") if kwargs.get("expedite"): # find works with missing pages @@ -133,7 +151,20 @@ def handle(self, *args, **kwargs): pages_per_work = facets.facet_fields["group_id"] for digwork in DigitizedWork.items_to_index(): solr_page_count = pages_per_work.get(digwork.index_id(), 0) - if digwork.page_count != solr_page_count: + # it indicates an error, but page count could be null; + # if so, assume page count mismatch + if digwork.page_count is None: + # add to list of works to index + mismatches.append(digwork) + # warn about the missing page count + if self.verbosity >= self.v_normal: + self.stdout.write( + self.style.WARNING( + f"Warning: {digwork} page count is not set in database" + ) + ) + + elif digwork.page_count != solr_page_count: # add to list of works to index mismatches.append(digwork) From 203c682812300a0d5a3b9f9dc648be6fa5b25e5a Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 28 Mar 2024 14:15:14 -0400 Subject: [PATCH 36/71] New manage command to update excerpt digital page range adapted from hathi_excerpt manage command resolves #625 --- .../management/commands/adjust_excerpts.py | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 ppa/archive/management/commands/adjust_excerpts.py diff --git a/ppa/archive/management/commands/adjust_excerpts.py b/ppa/archive/management/commands/adjust_excerpts.py new file mode 100644 index 00000000..5f99b9de --- /dev/null +++ b/ppa/archive/management/commands/adjust_excerpts.py @@ -0,0 +1,136 @@ +""" +**adjust_excerpts** is a custom manage command to update +the digital page range for excerpts or articles. It requires a CSV file +with source id and original page range (to identify the correct record), +and the new digital page range. + +The CSV must include: + * source_id + * pages_orig + * new_pages_digital + +Updated records are automatically indexed in Solr. +""" + +import csv +import logging + +import intspan +from django.conf import settings +from django.contrib.admin.models import CHANGE, LogEntry +from django.contrib.auth.models import User +from django.contrib.contenttypes.models import ContentType +from django.core.management.base import BaseCommand, CommandError + +from ppa.archive.models import DigitizedWork + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + """Update digital page range for excerpted works.""" + + help = __doc__ + + #: normal verbosity level + v_normal = 1 + verbosity = v_normal + + def add_arguments(self, parser): + parser.add_argument("csv", help="CSV file with updated page ranges") + + def handle(self, *args, **kwargs): + self.verbosity = kwargs.get("verbosity", self.verbosity) + + # load csv file and check required fields + excerpt_info = self.load_csv(kwargs["csv"]) + + self.stats = {"error": 0, "notfound": 0, "updated": 0} + + # get script user and digwork content type for creating log entries + self.script_user = User.objects.get(username=settings.SCRIPT_USERNAME) + self.digwork_contentype = ContentType.objects.get_for_model(DigitizedWork) + + for row in excerpt_info: + self.update_excerpt(row) + + self.stdout.write( + f"\nUpdated {self.stats['updated']:,d} records. " + + f"{self.stats['notfound']:,d} not found, " + + f"{self.stats['error']:,d} error{'s' if self.stats['error'] != 1 else ''}." + ) + + def update_excerpt(self, row): + """Process a row of the spreadsheet, find an existing excerpt + by source id and original page range, and update the digital + pages.""" + + # lookup by source id and original page range + digwork = DigitizedWork.objects.filter( + source_id=row["source_id"], pages_orig=row["pages_orig"] + ).first() + if not digwork: + self.stdout.write( + self.style.WARNING( + "No record found for source id %(source_id)s and pages_orig %(pages_orig)s" + % row + ) + ) + self.stats["notfound"] += 1 + return + + # update digital page range + digwork.pages_digital = row["new_pages_digital"] + # if this is not a change, do nothing + if not digwork.has_changed("pages_digital"): + return + + try: + # save in the database; + # should automatically recalculate page range and index page content + digwork.save() + self.stats["updated"] += 1 + except intspan.ParseError as err: + self.stderr.write( + self.style.WARNING("Error saving %s: %s" % (digwork, err)) + ) + self.stats["error"] += 1 + return + + # if changed and save succeeded, log the update + self.log_update(digwork) + + def log_update(self, digwork): + """Create a log entry to document digital page range change.""" + + # create log entry to record what was done + LogEntry.objects.log_action( + user_id=self.script_user.pk, + content_type_id=self.digwork_contentype.pk, + object_id=digwork.pk, + object_repr=str(digwork), + change_message="Updated pages_digital", + action_flag=CHANGE, + ) + + csv_required_fields = ["source_id", "pages_orig", "new_pages_digital"] + + def load_csv(self, path): + """Load a CSV file with information about excerpts to be updated.""" + try: + with open(path, encoding="utf-8-sig") as csvfile: + csvreader = csv.DictReader(csvfile) + data = [ + row for row in csvreader if any(row.values()) + ] # skip blank rows + except FileNotFoundError: + raise CommandError("Error loading the specified CSV file: %s" % path) + + csv_keys = set(data[0].keys()) + csv_key_diff = set(self.csv_required_fields).difference(csv_keys) + # if any required fields are not present, error and quit + if csv_key_diff: + raise CommandError( + "Missing required fields in CSV file: %s" % ", ".join(csv_key_diff) + ) + return data From 0379a08108f0f07faaa60d2a72286302fb76c7ec Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 28 Mar 2024 14:57:26 -0400 Subject: [PATCH 37/71] Update adjust_excerpts to avoid duplication, add unit tests - extend hathi_excerpt management command to avoid duplicating load_csv --- .../management/commands/adjust_excerpts.py | 55 +++----- ppa/archive/tests/test_adjust_excerpts.py | 124 ++++++++++++++++++ 2 files changed, 145 insertions(+), 34 deletions(-) create mode 100644 ppa/archive/tests/test_adjust_excerpts.py diff --git a/ppa/archive/management/commands/adjust_excerpts.py b/ppa/archive/management/commands/adjust_excerpts.py index 5f99b9de..20373c8d 100644 --- a/ppa/archive/management/commands/adjust_excerpts.py +++ b/ppa/archive/management/commands/adjust_excerpts.py @@ -12,7 +12,6 @@ Updated records are automatically indexed in Solr. """ -import csv import logging import intspan @@ -20,44 +19,53 @@ from django.contrib.admin.models import CHANGE, LogEntry from django.contrib.auth.models import User from django.contrib.contenttypes.models import ContentType -from django.core.management.base import BaseCommand, CommandError +from django.template.defaultfilters import pluralize from ppa.archive.models import DigitizedWork +from ppa.archive.management.commands import hathi_excerpt logger = logging.getLogger(__name__) -class Command(BaseCommand): +class Command(hathi_excerpt.Command): """Update digital page range for excerpted works.""" help = __doc__ + # inherits csv loading & validation from hathi_excerpt command #: normal verbosity level v_normal = 1 verbosity = v_normal + #: override required fields + csv_required_fields = ["source_id", "pages_orig", "new_pages_digital"] def add_arguments(self, parser): parser.add_argument("csv", help="CSV file with updated page ranges") + def setup(self): + "common setup steps for running the script or testing" + + self.stats = {"error": 0, "notfound": 0, "updated": 0, "unchanged": 0} + self.script_user = User.objects.get(username=settings.SCRIPT_USERNAME) + self.digwork_contentype = ContentType.objects.get_for_model(DigitizedWork) + def handle(self, *args, **kwargs): self.verbosity = kwargs.get("verbosity", self.verbosity) # load csv file and check required fields excerpt_info = self.load_csv(kwargs["csv"]) - - self.stats = {"error": 0, "notfound": 0, "updated": 0} - - # get script user and digwork content type for creating log entries - self.script_user = User.objects.get(username=settings.SCRIPT_USERNAME) - self.digwork_contentype = ContentType.objects.get_for_model(DigitizedWork) + self.setup() for row in excerpt_info: self.update_excerpt(row) + # summarize what was done self.stdout.write( - f"\nUpdated {self.stats['updated']:,d} records. " + f"\nUpdated {self.stats['updated']:,d} " + + f"record{pluralize(self.stats['updated'])}. " + + f"{self.stats['unchanged']:,d} unchanged, " + f"{self.stats['notfound']:,d} not found, " - + f"{self.stats['error']:,d} error{'s' if self.stats['error'] != 1 else ''}." + + f"{self.stats['error']:,d} error{pluralize(self.stats['error'])}." ) def update_excerpt(self, row): @@ -83,6 +91,7 @@ def update_excerpt(self, row): digwork.pages_digital = row["new_pages_digital"] # if this is not a change, do nothing if not digwork.has_changed("pages_digital"): + self.stats["unchanged"] += 1 return try: @@ -108,29 +117,7 @@ def log_update(self, digwork): user_id=self.script_user.pk, content_type_id=self.digwork_contentype.pk, object_id=digwork.pk, - object_repr=str(digwork), + object_repr=repr(digwork), change_message="Updated pages_digital", action_flag=CHANGE, ) - - csv_required_fields = ["source_id", "pages_orig", "new_pages_digital"] - - def load_csv(self, path): - """Load a CSV file with information about excerpts to be updated.""" - try: - with open(path, encoding="utf-8-sig") as csvfile: - csvreader = csv.DictReader(csvfile) - data = [ - row for row in csvreader if any(row.values()) - ] # skip blank rows - except FileNotFoundError: - raise CommandError("Error loading the specified CSV file: %s" % path) - - csv_keys = set(data[0].keys()) - csv_key_diff = set(self.csv_required_fields).difference(csv_keys) - # if any required fields are not present, error and quit - if csv_key_diff: - raise CommandError( - "Missing required fields in CSV file: %s" % ", ".join(csv_key_diff) - ) - return data diff --git a/ppa/archive/tests/test_adjust_excerpts.py b/ppa/archive/tests/test_adjust_excerpts.py new file mode 100644 index 00000000..6e11145a --- /dev/null +++ b/ppa/archive/tests/test_adjust_excerpts.py @@ -0,0 +1,124 @@ +from io import StringIO +from unittest.mock import patch + +import pytest +from django.contrib.admin.models import CHANGE, LogEntry +from django.core.management import call_command + +from ppa.archive.models import DigitizedWork +from ppa.archive.management.commands import adjust_excerpts + + +@pytest.mark.django_db +class TestAdjustExcerptsCommand: + @patch("ppa.archive.models.DigitizedWork.index_items") + def test_update_success(self, mock_index_items): + source_id = "abc.13245089" + pages_orig = "10-20" + pages_digital = "12-22" + work = DigitizedWork.objects.create( + source_id=source_id, pages_orig=pages_orig, pages_digital=pages_digital + ) + + cmd = adjust_excerpts.Command() + cmd.setup() # initialize stats dict + + # test with sample info coming from csv + update_info = { + "source_id": source_id, + "pages_orig": pages_orig, + "new_pages_digital": "15-25", + } + cmd.update_excerpt(update_info) + assert cmd.stats["updated"] == 1 + # inspect the newly-excerpted work; get a fresh copy from the db + excerpt = DigitizedWork.objects.get(pk=work.pk) + assert excerpt.pages_digital == update_info["new_pages_digital"] + + # check that log entry was created to document the change + log = LogEntry.objects.get(object_id=excerpt.pk) + assert log.action_flag == CHANGE + assert log.change_message == "Updated pages_digital" + assert log.user.username == "script" + + def test_not_found(self, capsys): + cmd = adjust_excerpts.Command() + cmd.setup() # initialize stats dict + + # test with sample info, no corresponding db record + update_info = { + "source_id": "abcs.123", + "pages_orig": "i-iii", + "new_pages_digital": "15-25", + } + cmd.update_excerpt(update_info) + assert cmd.stats["notfound"] == 1 + captured = capsys.readouterr() + assert "No record found" in captured.out + + def test_error(self, capsys): + source_id = "abc.13245089" + pages_orig = "10-20" + pages_digital = "12-22" + DigitizedWork.objects.create( + source_id=source_id, pages_orig=pages_orig, pages_digital=pages_digital + ) + + cmd = adjust_excerpts.Command() + cmd.setup() + # test with sample info coming from csv + update_info = { + "source_id": source_id, + "pages_orig": pages_orig, + "new_pages_digital": "BOGUS", + } + cmd.update_excerpt(update_info) + assert cmd.stats["error"] == 1 + # check captured output + captured = capsys.readouterr() + assert f"Error saving {source_id}" in captured.err + + def test_unchanged(self): + source_id = "abc.13245089" + pages_orig = "10-20" + pages_digital = "12-22" + DigitizedWork.objects.create( + source_id=source_id, pages_orig=pages_orig, pages_digital=pages_digital + ) + cmd = adjust_excerpts.Command() + cmd.setup() + + # test with sample info coming from csv + update_info = { + "source_id": source_id, + "pages_orig": pages_orig, + "new_pages_digital": pages_digital, + } + cmd.update_excerpt(update_info) + assert cmd.stats["unchanged"] == 1 + + @patch("ppa.archive.models.DigitizedWork.index_items") + def test_call_commmand(self, mock_index_items, tmp_path): + source_id = "abc.13245089" + pages_orig = "10-20" + pages_digital = "12-22" + DigitizedWork.objects.create( + source_id=source_id, pages_orig=pages_orig, pages_digital=pages_digital + ) + stdout = StringIO() + # create minimal valid CSV with all required fields + csvfile = tmp_path / "excerpt_updates.csv" + csvfile.write_text( + "\n".join( + [ + "source_id,pages_orig,new_pages_digital", + f"{source_id},{pages_orig},25-30", + ] + ) + ) + call_command("adjust_excerpts", csvfile, stdout=stdout) + output = stdout.getvalue() + assert "Updated 1 record." in output + assert "0 errors" in output + assert "0 not found" in output + assert "0 unchanged" in output From d296e0a1d9eeec71585e27c4235dc773fe0f5c46 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 28 Mar 2024 15:05:24 -0400 Subject: [PATCH 38/71] Update tests so they do not error without hathi data setting --- ppa/archive/tests/test_adjust_excerpts.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/ppa/archive/tests/test_adjust_excerpts.py b/ppa/archive/tests/test_adjust_excerpts.py index 6e11145a..d7963e04 100644 --- a/ppa/archive/tests/test_adjust_excerpts.py +++ b/ppa/archive/tests/test_adjust_excerpts.py @@ -17,7 +17,10 @@ def test_update_success(self, mock_index_items): pages_orig = "10-20" pages_digital = "12-22" work = DigitizedWork.objects.create( - source_id=source_id, pages_orig=pages_orig, pages_digital=pages_digital + source_id=source_id, + pages_orig=pages_orig, + pages_digital=pages_digital, + source=DigitizedWork.OTHER, ) cmd = adjust_excerpts.Command() @@ -61,7 +64,10 @@ def test_error(self, capsys): pages_orig = "10-20" pages_digital = "12-22" DigitizedWork.objects.create( - source_id=source_id, pages_orig=pages_orig, pages_digital=pages_digital + source_id=source_id, + pages_orig=pages_orig, + pages_digital=pages_digital, + source=DigitizedWork.OTHER, ) cmd = adjust_excerpts.Command() @@ -83,7 +89,10 @@ def test_unchanged(self): pages_orig = "10-20" pages_digital = "12-22" DigitizedWork.objects.create( - source_id=source_id, pages_orig=pages_orig, pages_digital=pages_digital + source_id=source_id, + pages_orig=pages_orig, + pages_digital=pages_digital, + source=DigitizedWork.OTHER, ) cmd = adjust_excerpts.Command() cmd.setup() From 401f44c6798bfa922c4d984d5a4729d67a8711f8 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 28 Mar 2024 15:41:16 -0400 Subject: [PATCH 39/71] Check the correct variable for negative page count difference --- ppa/archive/management/commands/index_pages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppa/archive/management/commands/index_pages.py b/ppa/archive/management/commands/index_pages.py index 7682c6d2..eff9a1a7 100644 --- a/ppa/archive/management/commands/index_pages.py +++ b/ppa/archive/management/commands/index_pages.py @@ -129,7 +129,7 @@ def handle(self, *args, **kwargs): self.stdout.write(f"{work_diff:,} works not indexed in Solr") if page_diff: # negative = more pages in solr than expected - if work_diff < 0: + if page_diff < 0: self.stdout.write( self.style.WARNING( f"{abs(page_diff):,} more pages indexed in Solr than expected" From ae124a01f6b6b1ce0b26ed70371f9907832e8d71 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 09:20:24 -0400 Subject: [PATCH 40/71] Add old work id field to DigitizedWork --- .../0022_digitizedwork_old_workid.py | 22 +++++++++++++++++++ ppa/archive/models.py | 7 ++++++ 2 files changed, 29 insertions(+) create mode 100644 ppa/archive/migrations/0022_digitizedwork_old_workid.py diff --git a/ppa/archive/migrations/0022_digitizedwork_old_workid.py b/ppa/archive/migrations/0022_digitizedwork_old_workid.py new file mode 100644 index 00000000..5673147e --- /dev/null +++ b/ppa/archive/migrations/0022_digitizedwork_old_workid.py @@ -0,0 +1,22 @@ +# Generated by Django 5.0.2 on 2024-04-04 13:20 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("archive", "0021_alter_digitizedwork_protected_fields"), + ] + + operations = [ + migrations.AddField( + model_name="digitizedwork", + name="old_workid", + field=models.CharField( + blank=True, + help_text="past work id; used for excerpts previously identified by start of digital page range", + max_length=255, + verbose_name="Old Work ID", + ), + ), + ] diff --git a/ppa/archive/models.py b/ppa/archive/models.py index 1a9cead6..6c526641 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -480,6 +480,13 @@ class DigitizedWork(ModelIndexable, TrackChangesModel): blank=True, validators=[validate_page_range], ) + old_workid = models.CharField( + "Old Work ID", + max_length=255, + help_text="past work id; used for excerpts previously " + + "identified by start of digital page range", + blank=True, + ) class Meta: ordering = ("sort_title",) From 94d22e0c9809885594f0591303e043fe240d0708 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 10:01:34 -0400 Subject: [PATCH 41/71] Data migration to populate old work id for excerpt + first digital page --- .../0023_save_excerpt_old_workid.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 ppa/archive/migrations/0023_save_excerpt_old_workid.py diff --git a/ppa/archive/migrations/0023_save_excerpt_old_workid.py b/ppa/archive/migrations/0023_save_excerpt_old_workid.py new file mode 100644 index 00000000..4a568884 --- /dev/null +++ b/ppa/archive/migrations/0023_save_excerpt_old_workid.py @@ -0,0 +1,31 @@ +# Generated by Django 5.0.2 on 2024-04-04 13:26 + +from django.db import migrations + +from intspan import intspan + + +def populate_excerpt_old_workid(apps, schema_editor): + DigitizedWork = apps.get_model("archive", "DigitizedWork") + # find all works with a digital page range + for digwork in DigitizedWork.objects.exclude(pages_digital=""): + # use logic similar to model code to parse the page range + # and get the number of the first page + first_digital_page = list(intspan(digwork.pages_digital))[0] + # should not be possible to save a record with a page range + # that can't be parsed by intspan + # previously, excerpt id was source_id-pN where N is first digital page + digwork.old_workid = f"{digwork.source_id}-p{first_digital_page}" + digwork.save() + + +class Migration(migrations.Migration): + dependencies = [ + ("archive", "0022_digitizedwork_old_workid"), + ] + + operations = [ + migrations.RunPython( + code=populate_excerpt_old_workid, reverse_code=migrations.RunPython.noop + ) + ] From df468e720d830b287244f2e13ad8208d2aae14b8 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 10:47:43 -0400 Subject: [PATCH 42/71] Update digitized work url and view to use original start page previously used first page in digital pages, but that is not stable --- ppa/archive/models.py | 11 +++++------ ppa/archive/tests/test_models.py | 24 ++++++++++++++++++++++-- ppa/archive/tests/test_views.py | 16 +++++++++++++--- ppa/archive/urls.py | 3 ++- ppa/archive/views.py | 4 ++-- 5 files changed, 44 insertions(+), 14 deletions(-) diff --git a/ppa/archive/models.py b/ppa/archive/models.py index 6c526641..e36c7912 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -505,7 +505,7 @@ def get_absolute_url(self): """ url_opts = {"source_id": self.source_id} # start page must be specified if set but must not be included if empty - if self.pages_digital: + if self.pages_orig: url_opts["start_page"] = self.first_page() return reverse("archive:detail", kwargs=url_opts) @@ -846,10 +846,9 @@ def populate_from_bibdata(self, bibdata): } def first_page(self): - """Number of the first page in range, if this is an excerpt""" - # return digital page for now; may be switching to original - # or this method may be going away - return self.first_page_digital() + """Number of the first page in range, if this is an excerpt + (first of original page range, not digital)""" + return self.first_page_original() def first_page_digital(self): """Number of the first page in range (digital pages / page index), @@ -877,7 +876,7 @@ def index_id(self): """use source id + first page in range (if any) as solr identifier""" first_page = self.first_page() if first_page: - return "%s-p%d" % (self.source_id, first_page) + return "%s-p%s" % (self.source_id, first_page) return self.source_id @classmethod diff --git a/ppa/archive/tests/test_models.py b/ppa/archive/tests/test_models.py index e21e688a..d8bc764b 100644 --- a/ppa/archive/tests/test_models.py +++ b/ppa/archive/tests/test_models.py @@ -552,16 +552,25 @@ def test_index_data(self): assert index_data["id"] == digwork.source_id def test_get_absolute_url(self): - work = DigitizedWork.objects.first() + work = DigitizedWork.objects.filter(pages_orig="").first() + print(work) assert work.get_absolute_url() == reverse( "archive:detail", kwargs={"source_id": work.source_id} ) - work.pages_digital = "11-13" + work.pages_orig = "11-13" + print(work) + print(work.first_page()) + print(work.first_page_original()) assert work.get_absolute_url() == reverse( "archive:detail", kwargs={"source_id": work.source_id, "start_page": 11} ) + work.pages_orig = "iii-xi" + assert work.get_absolute_url() == reverse( + "archive:detail", kwargs={"source_id": work.source_id, "start_page": "iii"} + ) + @patch("ppa.archive.models.HathiBibliographicAPI") def test_get_metadata_hathi(self, mock_hathibib): work = DigitizedWork(source_id="ht:1234") @@ -672,6 +681,17 @@ def test_index_id(self): work = DigitizedWork(source_id="chi.79279237") assert work.index_id() == work.source_id + # for excerpts, index id includes first page from original page range + excerpt = DigitizedWork( + source_id="chi.89279238", pages_orig="3-5", pages_digital="5-7" + ) + assert excerpt.index_id() == f"{excerpt.source_id}-p3" + + excerpt = DigitizedWork( + source_id="abc.123459238", pages_orig="ii-iv", pages_digital="3-4" + ) + assert excerpt.index_id() == f"{excerpt.source_id}-pii" + def test_save_suppress(self): work = DigitizedWork(source_id="chi.79279237") with patch.object(work, "hathi") as mock_hathiobj: diff --git a/ppa/archive/tests/test_views.py b/ppa/archive/tests/test_views.py index afc4d41c..8704dd81 100644 --- a/ppa/archive/tests/test_views.py +++ b/ppa/archive/tests/test_views.py @@ -438,7 +438,7 @@ def test_get_queryset(self, mock_index_items): assert self.client.get(bogus_dial_excerpt_url).status_code == 404 # create and retrieve an excerpt; should return 200 ok with correct object dial_excerpt = DigitizedWork.objects.create( - source_id=self.dial.source_id, pages_digital="200-250" + source_id=self.dial.source_id, pages_orig="200-250", pages_digital="202-251" ) response = self.client.get(dial_excerpt.get_absolute_url()) assert response.status_code == 200 @@ -449,9 +449,17 @@ def test_get_queryset(self, mock_index_items): assert response.status_code == 200 assert response.context["object"] == self.dial + # confirm first page regex filter works propertly + dial_excerpt2 = DigitizedWork.objects.create( + source_id=self.dial.source_id, pages_orig="20-25", pages_digital="22-27" + ) + response = self.client.get(dial_excerpt2.get_absolute_url()) + # start page 20 should match 20 only and not 200 + assert response.context["object"] == dial_excerpt2 + # create excerpt where there is no existing work excerpt = DigitizedWork.objects.create( - source_id="abc.123456", pages_digital="10-20" + source_id="abc.123456", pages_orig="10-20", pages_digital="12-22" ) response = self.client.get(excerpt.get_absolute_url()) # retrieve url for source id with no start apge @@ -464,7 +472,9 @@ def test_get_queryset(self, mock_index_items): assert response["Location"] == excerpt.get_absolute_url() # if there are *TWO* excerpts for the same source, should 404 instead of redirecting - DigitizedWork.objects.create(source_id="abc.123456", pages_digital="30-45") + DigitizedWork.objects.create( + source_id="abc.123456", pages_orig="30-45", pages_digital="32-47" + ) assert self.client.get(nonexistent_source_url).status_code == 404 diff --git a/ppa/archive/urls.py b/ppa/archive/urls.py index 9b722be8..e3e6239c 100644 --- a/ppa/archive/urls.py +++ b/ppa/archive/urls.py @@ -21,8 +21,9 @@ views.DigitizedWorkByRecordId.as_view(), name="record-id", ), + # excerpt original page may be numeric or alpha (e.g., roman numerals) re_path( - r"^(?P[^-]+)-p(?P\d+)/", + r"^(?P[^-]+)-p(?P[\da-zA-Z]+)/", views.DigitizedWorkDetailView.as_view(), name="detail", ), diff --git a/ppa/archive/views.py b/ppa/archive/views.py index 4bbf965a..3cfff097 100644 --- a/ppa/archive/views.py +++ b/ppa/archive/views.py @@ -299,10 +299,10 @@ def get_queryset(self): start_page = self.kwargs.get("start_page") # if start page is specified, filter to get the correct excerpt if start_page: - qs = source_qs.filter(pages_digital__startswith=start_page) + qs = source_qs.filter(pages_orig__regex=f"^{start_page}([,-]|\b)") # if start page is NOT specified, ensure we do not retrieve an excerpt else: - qs = source_qs.filter(pages_digital__exact="") + qs = source_qs.filter(pages_orig__exact="") # if qs is empty and start page is not set, check if there is _one_ excerpt # for the source id; if there is, we want to return a permanent redirect From f8a493092bb5bb0017b49c7ceb6d97100e1e1fcd Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 10:48:58 -0400 Subject: [PATCH 43/71] Add unique constraint on source id + original page range --- ...digitizedwork_unique_sourceid_pages_orig.py | 18 ++++++++++++++++++ ppa/archive/models.py | 7 ++++++- 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 ppa/archive/migrations/0024_digitizedwork_unique_sourceid_pages_orig.py diff --git a/ppa/archive/migrations/0024_digitizedwork_unique_sourceid_pages_orig.py b/ppa/archive/migrations/0024_digitizedwork_unique_sourceid_pages_orig.py new file mode 100644 index 00000000..328116aa --- /dev/null +++ b/ppa/archive/migrations/0024_digitizedwork_unique_sourceid_pages_orig.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.2 on 2024-04-04 14:48 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("archive", "0023_save_excerpt_old_workid"), + ] + + operations = [ + migrations.AddConstraint( + model_name="digitizedwork", + constraint=models.UniqueConstraint( + fields=("source_id", "pages_orig"), name="unique_sourceid_pages_orig" + ), + ), + ] diff --git a/ppa/archive/models.py b/ppa/archive/models.py index e36c7912..204ca515 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -495,7 +495,12 @@ class Meta: constraints = [ models.UniqueConstraint( fields=["source_id", "pages_digital"], name="unique_sourceid_pagerange" - ) + ), + # we are now using original page range for unique id, + # so require source id + pages_orig to be unique + models.UniqueConstraint( + fields=["source_id", "pages_orig"], name="unique_sourceid_pages_orig" + ), ] def get_absolute_url(self): From 0d169c1225f28f9c0acffb0ae748031b623c82d1 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 11:07:47 -0400 Subject: [PATCH 44/71] Add redirect based on old work id (first digital page) --- ppa/archive/tests/test_views.py | 22 ++++++++++++++++++++-- ppa/archive/views.py | 17 +++++++++++++---- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/ppa/archive/tests/test_views.py b/ppa/archive/tests/test_views.py index 8704dd81..126f9fbf 100644 --- a/ppa/archive/tests/test_views.py +++ b/ppa/archive/tests/test_views.py @@ -457,9 +457,13 @@ def test_get_queryset(self, mock_index_items): # start page 20 should match 20 only and not 200 assert response.context["object"] == dial_excerpt2 - # create excerpt where there is no existing work + # create excerpt where there is no existing work; + # set old_workid based on first digital page excerpt = DigitizedWork.objects.create( - source_id="abc.123456", pages_orig="10-20", pages_digital="12-22" + source_id="abc.123456", + pages_orig="10-20", + pages_digital="12-22", + old_workid="abc.123456-p12", ) response = self.client.get(excerpt.get_absolute_url()) # retrieve url for source id with no start apge @@ -477,6 +481,20 @@ def test_get_queryset(self, mock_index_items): ) assert self.client.get(nonexistent_source_url).status_code == 404 + # if we try to find a work by the old id (first digital page), + # should redirect + response = self.client.get( + reverse( + "archive:detail", + kwargs={ + "source_id": excerpt.source_id, + "start_page": excerpt.first_page_digital(), + }, + ) + ) + assert response.status_code == 301 + assert response["Location"] == excerpt.get_absolute_url() + class TestDigitizedWorkListRequest(TestCase): fixtures = ["sample_digitized_works"] diff --git a/ppa/archive/views.py b/ppa/archive/views.py index 3cfff097..b4f845c2 100644 --- a/ppa/archive/views.py +++ b/ppa/archive/views.py @@ -304,11 +304,20 @@ def get_queryset(self): else: qs = source_qs.filter(pages_orig__exact="") - # if qs is empty and start page is not set, check if there is _one_ excerpt - # for the source id; if there is, we want to return a permanent redirect - if not qs.exists() and not start_page: - if source_qs.count() == 1: + if not qs.exists(): + # if qs is empty and start page is not set, check if there is _one_ excerpt + # for the source id; if there is, we want to return a permanent redirect + if not start_page and source_qs.count() == 1: self.redirect_url = source_qs.first().get_absolute_url() + if start_page: + # if qs empty and start page _is_ set, check for an old id + # (previously excerpt ids were based on digital page range) + digwork_oldid = source_qs.filter( + old_workid="%(source_id)s-p%(start_page)s" % self.kwargs + ).first() + if digwork_oldid: + self.redirect_url = source_qs.first().get_absolute_url() + # otherwise, return a 404 return qs From 1f1dba745b22de9804ba16b55ca82f796ee67c18 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 11:24:54 -0400 Subject: [PATCH 45/71] Configure wagtail admin base url --- ppa/settings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ppa/settings.py b/ppa/settings.py index c6d14ebd..07b11eff 100644 --- a/ppa/settings.py +++ b/ppa/settings.py @@ -204,6 +204,8 @@ SITE_ID = 1 WAGTAIL_SITE_NAME = "Princeton Prosody Archive" +# needed by wagtail to generate URLs for notification emails +WAGTAILADMIN_BASE_URL = "https://prosody.princeton.edu/" WAGTAILEMBEDS_FINDERS = [ {"class": "wagtail.embeds.finders.oembed"}, From 93b42d8c6c8cf5eef20358254d261e708f5aea46 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 11:31:19 -0400 Subject: [PATCH 46/71] Remove unsupported draftail feature 'document' --- ppa/pages/models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ppa/pages/models.py b/ppa/pages/models.py index 266373c0..83733a7e 100644 --- a/ppa/pages/models.py +++ b/ppa/pages/models.py @@ -248,7 +248,6 @@ class BodyContentBlock(blocks.StreamBlock): "ul", "hr", "blockquote", - "document", "superscript", "subscript", "strikethrough", From 1e286ebac5e5db58b1ee8fc2bfbdeb8296b9208d Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 12:55:12 -0400 Subject: [PATCH 47/71] Refactor django settings to use split-settings approach adapted from geniza project settings --- .github/workflows/unit-tests.yml | 7 +- README.rst | 2 +- ppa/settings/__init__.py | 14 +++ .../components/base.py} | 110 +++++------------- ppa/settings/components/debug.py | 12 ++ ppa/settings/environments/development.py | 16 +++ .../settings/environments/test.py | 39 +++---- ppa/{ => settings}/local_settings.py.sample | 0 requirements.txt | 1 + 9 files changed, 97 insertions(+), 104 deletions(-) create mode 100644 ppa/settings/__init__.py rename ppa/{settings.py => settings/components/base.py} (74%) create mode 100644 ppa/settings/components/debug.py create mode 100644 ppa/settings/environments/development.py rename ci/testsettings.py => ppa/settings/environments/test.py (55%) rename ppa/{ => settings}/local_settings.py.sample (100%) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 40922993..a9af212b 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -10,6 +10,7 @@ env: DB_NAME: ppa DB_USER: ppa DB_PASSWORD: ppa + DJANGO_ENV: test jobs: js-unit: @@ -89,12 +90,10 @@ jobs: pip install -r dev-requirements.txt - name: Setup local_settings.py - run: | - cp ci/testsettings.py ppa/local_settings.py - python -c "import uuid; print('SECRET_KEY = \'%s\'' % uuid.uuid4())" >> ppa/local_settings.py + run: python -c "import uuid; print('SECRET_KEY = \'%s\'' % uuid.uuid4())" >> ppa/settings/local_settings.py - name: Run pytest - run: py.test --cov=./ --cov-report=xml + run: pytest --cov=./ --cov-report=xml - name: Upload test coverage to Codecov uses: codecov/codecov-action@v4 diff --git a/README.rst b/README.rst index a0698f60..0efa1dbb 100644 --- a/README.rst +++ b/README.rst @@ -55,7 +55,7 @@ Initial setup and installation: - Copy sample local settings and configure for your environment:: - cp ppa/local_settings.py.sample ppa/local_settings.py + cp ppa/settings/local_settings.py.sample ppa/settings/local_settings.py - Create a database, configure in local settings in the `DATABASES` dictionary, change `SECRET_KEY`, and run migrations:: diff --git a/ppa/settings/__init__.py b/ppa/settings/__init__.py new file mode 100644 index 00000000..6ed415b9 --- /dev/null +++ b/ppa/settings/__init__.py @@ -0,0 +1,14 @@ +from os import environ + +from split_settings.tools import include, optional + +ENV = environ.get("DJANGO_ENV") or "development" + +include( + "components/base.py", + "components/debug.py", + # optionally load environment-specific configuration + optional("environments/{0}.py".format(ENV)), + # for now, local settings is required + "local_settings.py", +) diff --git a/ppa/settings.py b/ppa/settings/components/base.py similarity index 74% rename from ppa/settings.py rename to ppa/settings/components/base.py index 07b11eff..06cdd50f 100644 --- a/ppa/settings.py +++ b/ppa/settings/components/base.py @@ -1,34 +1,22 @@ """ Django settings for ppa project. - -Generated by 'django-admin startproject' using Django 1.11.7. - -For more information on this file, see -https://docs.djangoproject.com/en/1.11/topics/settings/ - -For the full list of settings and their values, see -https://docs.djangoproject.com/en/1.11/ref/settings/ """ -import os +from pathlib import Path -# Quick-start development settings - unsuitable for production -# See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ +# Build paths inside the project like this: BASE_DIR / 'subdir'. +# called from ppa-django/ppa/settings/__init__.py +# do NOT import this module directly, the path will be different +PROJECT_APP_PATH = Path(__file__).resolve().parent.parent +PROJECT_APP = PROJECT_APP_PATH.name +# base dir is one level up from that (ppa-django) +BASE_DIR = PROJECT_APP_PATH.parent # SECURITY WARNING: don't run with debug turned on in production! DEBUG = False ALLOWED_HOSTS = [] -######### -# PATHS # -######### - -# Full filesystem path to the project. -PROJECT_APP_PATH = os.path.dirname(os.path.abspath(__file__)) -PROJECT_APP = os.path.basename(PROJECT_APP_PATH) -PROJECT_ROOT = BASE_DIR = os.path.dirname(PROJECT_APP_PATH) - # Every cache key will get prefixed with this value - here we set it to # the name of the directory the project is in to try and use something # project specific. @@ -42,15 +30,15 @@ # Don't put anything in this directory yourself; store your static files # in apps' "static/" subdirectories and in STATICFILES_DIRS. # Example: "/home/media/media.lawrence.com/static/" -STATIC_ROOT = os.path.join(PROJECT_ROOT, STATIC_URL.strip("/")) +STATIC_ROOT = BASE_DIR / STATIC_URL.strip("/") # Additional locations of static files STATICFILES_DIRS = [ # Put strings here, like "/home/html/static" or "C:/www/django/static". # Always use forward slashes, even on Windows. # Don't forget to use absolute paths, not relative paths. - os.path.join(BASE_DIR, "sitemedia"), - os.path.join(BASE_DIR, "bundles"), + BASE_DIR / "sitemedia", + BASE_DIR / "bundles", ] # URL that handles the media served from MEDIA_ROOT. Make sure to use a @@ -60,7 +48,7 @@ # Absolute filesystem path to the directory that will hold user-uploaded files. # Example: "/home/media/media.lawrence.com/media/" -MEDIA_ROOT = os.path.join(PROJECT_ROOT, *MEDIA_URL.strip("/").split("/")) +MEDIA_ROOT = BASE_DIR / MEDIA_URL.strip("/") STATICFILES_FINDERS = ( "django.contrib.staticfiles.finders.FileSystemFinder", @@ -129,11 +117,10 @@ ROOT_URLCONF = "ppa.urls" - TEMPLATES = [ { "BACKEND": "django.template.backends.django.DjangoTemplates", - "DIRS": [os.path.join(BASE_DIR, "templates")], + "DIRS": [BASE_DIR / "templates"], "OPTIONS": { "context_processors": [ "django.template.context_processors.debug", @@ -154,18 +141,29 @@ WSGI_APPLICATION = "ppa.wsgi.application" - -# Database -# https://docs.djangoproject.com/en/1.11/ref/settings/#databases - DATABASES = { "default": { - "ENGINE": "django.db.backends.sqlite3", - "NAME": os.path.join(BASE_DIR, "db.sqlite3"), + "ENGINE": "django.db.backends.postgresql", + "NAME": "ppa", + "USER": "ppa", + "PASSWORD": "", + "HOST": "", # empty string for localhost + "PORT": "", # empty string for default + } +} + +SOLR_CONNECTIONS = { + "default": { + "URL": "http://localhost:8983/solr/", + "COLLECTION": "ppa", + "CONFIGSET": "ppa", + "TEST": { + # set aggressive commitWithin when testing + "COMMITWITHIN": 750, + }, } } -# preserve django 3.1 behavior DEFAULT_AUTO_FIELD = "django.db.models.AutoField" # Password validation @@ -212,12 +210,9 @@ {"class": "ppa.pages.embed_finders.GlitchEmbedFinder"}, ] -GRAPPELLI_ADMIN_TITLE = "Princeton Prosody Archive Admin" - # username for logging activity by local scripts SCRIPT_USERNAME = "script" - # PUCAS configuration for CAS/LDAP login and user provisioning. # Only includes non-sensitive configurations that do not change PUCAS_LDAP = { @@ -235,7 +230,7 @@ "DEFAULT": { "CACHE": True, "BUNDLE_DIR_NAME": "bundles/", # must end with slash - "STATS_FILE": os.path.join(BASE_DIR, "webpack-stats.json"), + "STATS_FILE": BASE_DIR / "webpack-stats.json", "POLL_INTERVAL": 0.1, "TIMEOUT": None, "IGNORE": [r".+\.hot-update.js", r".+\.map"], @@ -291,44 +286,3 @@ # load a manifest file CSP_MANIFEST_SRC = "'self'" - -################## -# LOCAL SETTINGS # -################## - -# (local settings import logic adapted from mezzanine) - -# Allow any settings to be defined in local_settings.py which should be -# ignored in your version control system allowing for settings to be -# defined per machine. - -# Instead of doing "from .local_settings import *", we use exec so that -# local_settings has full access to everything defined in this module. -# Also force into sys.modules so it's visible to Django's autoreload. - -f = os.path.join(BASE_DIR, "ppa", "local_settings.py") -if os.path.exists(f): - import imp - import sys - - module_name = "ppa.local_settings" - module = imp.new_module(module_name) - module.__file__ = f - sys.modules[module_name] = module - exec(open(f, "rb").read()) - -# if in debug mode and django-debug-toolbar is available, add to installed apps -if DEBUG: - try: - INSTALLED_APPS.append("debug_toolbar") - MIDDLEWARE.append("debug_toolbar.middleware.DebugToolbarMiddleware") - except ImportError: - pass - - # allow webpack dev server through CSP when in DEBUG - CSP_SCRIPT_SRC += ("http://localhost:3000", "'unsafe-eval'", "'unsafe-inline'") - CSP_STYLE_SRC += ("http://localhost:3000", "'unsafe-inline'") - CSP_CONNECT_SRC += ( - "http://localhost:3000", - "ws://localhost:3000", - ) diff --git a/ppa/settings/components/debug.py b/ppa/settings/components/debug.py new file mode 100644 index 00000000..cd2fde28 --- /dev/null +++ b/ppa/settings/components/debug.py @@ -0,0 +1,12 @@ +# if django-debug-toolbar is installed, enable it + +from ppa.settings.components.base import INSTALLED_APPS, MIDDLEWARE + +# Configure internal IPs for access to view debug toolbar +INTERNAL_IPS = ["127.0.0.1", "localhost"] + +try: + INSTALLED_APPS.append("debug_toolbar") + MIDDLEWARE += ("debug_toolbar.middleware.DebugToolbarMiddleware",) +except ImportError: + pass diff --git a/ppa/settings/environments/development.py b/ppa/settings/environments/development.py new file mode 100644 index 00000000..5d6f06f1 --- /dev/null +++ b/ppa/settings/environments/development.py @@ -0,0 +1,16 @@ +from ppa.settings import CSP_SCRIPT_SRC, CSP_STYLE_SRC, CSP_CONNECT_SRC + + +DEBUG = True + +# ALLOWED_HOSTS = ["*"] +CSP_REPORT_ONLY = True + +if DEBUG: + # allow webpack dev server through CSP when in DEBUG + CSP_SCRIPT_SRC += ("http://localhost:3000", "'unsafe-eval'", "'unsafe-inline'") + CSP_STYLE_SRC += ("http://localhost:3000", "'unsafe-inline'") + CSP_CONNECT_SRC += ( + "http://localhost:3000", + "ws://localhost:3000", + ) diff --git a/ci/testsettings.py b/ppa/settings/environments/test.py similarity index 55% rename from ci/testsettings.py rename to ppa/settings/environments/test.py index 8d22934f..fa8d90c4 100644 --- a/ci/testsettings.py +++ b/ppa/settings/environments/test.py @@ -1,14 +1,9 @@ -# This file is exec'd from settings.py, so it has access to and can -# modify all the variables in settings.py. +from ppa.settings import DATABASES, SOLR_CONNECTIONS -# If this file is changed in development, the development server will -# have to be manually restarted because changes will not be noticed -# immediately. - -DEBUG = False - -DATABASES = { - "default": { +# These settings correspond to the service container settings in the +# .github/workflow .yml files. +DATABASES["default"].update( + { "ENGINE": "django.db.backends.postgresql", "NAME": "ppa", "PASSWORD": "ppa", @@ -18,25 +13,27 @@ "TEST": { "CHARSET": "utf8", }, - }, -} - -# required by mezzanine for unit tests -ALLOWED_HOSTS = ["*"] + } +) -# required for integration tests that query Solr -SOLR_CONNECTIONS = { - "default": { +SOLR_CONNECTIONS["default"].update( + { "URL": "http://localhost:8983/solr/", "COLLECTION": "ppa", "CONFIGSET": "ppa", + # set aggressive commitWithin for test + "COMMITWITHIN": 750, "TEST": {"COMMITWITHIN": 100}, } -} +) + +# turn off debug so we see 404s when testing +DEBUG = False + +# required for tests when DEBUG = False +ALLOWED_HOSTS = ["*"] # use a fake webpack loader to ignore missing assets for unit tests WEBPACK_LOADER = { "DEFAULT": {"LOADER_CLASS": "webpack_loader.loaders.FakeWebpackLoader"} } - -# secret key added as a travis build step diff --git a/ppa/local_settings.py.sample b/ppa/settings/local_settings.py.sample similarity index 100% rename from ppa/local_settings.py.sample rename to ppa/settings/local_settings.py.sample diff --git a/requirements.txt b/requirements.txt index 92e81316..0984208a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,5 +29,6 @@ django-adminlogentries django-import-export psycopg2-binary multiprocess +django-split-settings # only needed for the 'generate_textcorpus' manage command orjsonl \ No newline at end of file From f907a8fc92e9bb37e8f1ec997ff982b502987407 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 12:56:50 -0400 Subject: [PATCH 48/71] Simplify unit test setup; only testing on postgresql, not mysql --- .github/workflows/unit-tests.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index a9af212b..ae05b290 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -7,9 +7,6 @@ on: - cron: "0 16 * * 2" env: - DB_NAME: ppa - DB_USER: ppa - DB_PASSWORD: ppa DJANGO_ENV: test jobs: @@ -50,13 +47,6 @@ jobs: ports: - 8983:8983 steps: - # Set the value of DJANGO_DB_BACKEND which is used in ci/testsettings.py to - # configure django's ORM based on whether we're testing postgres or mysql - - name: Set django database backend adapter - env: - BACKEND: postgresql - run: echo "DJANGO_DB_BACKEND=$(echo "$BACKEND")" >> $GITHUB_ENV - - name: Checkout repository uses: actions/checkout@v4 From d918cb3ea9d492d5ae504b89e43b30ef5607a1b4 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 13:02:08 -0400 Subject: [PATCH 49/71] Update sphinx workflow for local settings path change --- .github/workflows/sphinx_docs.yml | 2 +- sphinx-docs/conf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index 825ea71e..74b29d24 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -36,7 +36,7 @@ jobs: run: pip install -r dev-requirements.txt - name: Setup local_settings.py - run: python -c "import uuid; print('SECRET_KEY = \'%s\'' % uuid.uuid4())" >> ppa/local_settings.py + run: python -c "import uuid; print('SECRET_KEY = \'%s\'' % uuid.uuid4())" >> ppa/settings/local_settings.py - name: Build Sphinx docs run: cd sphinx-docs && make -b coverage html diff --git a/sphinx-docs/conf.py b/sphinx-docs/conf.py index 87692380..c64fdee6 100644 --- a/sphinx-docs/conf.py +++ b/sphinx-docs/conf.py @@ -63,7 +63,7 @@ # General information about the project. project = "Princeton Prosody Archive" -copyright = "2018, CDH @ Princeton University" +copyright = "2024, CDH @ Princeton University" author = "CDH @ Princeton University" # The version info for the project you're documenting, acts as replacement for From 05d2f3ec59cb5a407d2b429234bd85bb0d0421cc Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 14:11:44 -0400 Subject: [PATCH 50/71] Remove unused test requirements file (out of date, overlaps with dev) --- test-requirements.txt | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 test-requirements.txt diff --git a/test-requirements.txt b/test-requirements.txt deleted file mode 100644 index b461b066..00000000 --- a/test-requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -pytest>=3.6,<5.4 -pytest-django -pytest-cov -django-webpack-loader \ No newline at end of file From ebe3607495de016a02732315876951c9a790ef75 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 14:17:13 -0400 Subject: [PATCH 51/71] Suppress deprecation warnings when running pytest - configure pythonpath in pytest.ini (requires pytest 7 or greater) - simplify pytest instructions in readme; add note about suppressed warnings --- DEPLOYNOTES.rst | 6 ++++++ README.rst | 19 ++++++------------- dev-requirements.txt | 2 +- pytest.ini | 6 ++++++ 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/DEPLOYNOTES.rst b/DEPLOYNOTES.rst index 8d59b1af..19773102 100644 --- a/DEPLOYNOTES.rst +++ b/DEPLOYNOTES.rst @@ -3,6 +3,12 @@ Deploy and Upgrade notes ======================== +3.12 +---- + +* Settings are now configured with django-split-settings as a module; + the path to local_settings.py is now ppa/settings/local_settings.py + 3.11.2 ------ diff --git a/README.rst b/README.rst index 0efa1dbb..76482cba 100644 --- a/README.rst +++ b/README.rst @@ -109,29 +109,22 @@ either set of assets frequently. These two processes are separate as well:: Tests ~~~~~ -Python unit tests are written with `py.test `_ but use +Python unit tests are written with `pytest `_ but use Django fixture loading and convenience testing methods when that makes things easier. To run them, first install development requirements:: pip install -r dev-requirements.txt -Run tests using py.test. Note that this currently requires the -top level project directory be included in your python path. You can -accomplish this either by calling pytest via python:: +To run all python unit tests, use: `pytest` - python -m pytest - -Or, if you wish to use the ``pytest`` command directly, simply add the -top-level project directory to your python path environment variable:: - - setenv PYTHONPATH . # csh - export PYTHONPATH=. # bash +Some deprecation warnings for dependencies have been suppressed in +pytest.ini; to see warnings, run with `pytest -Wd`. Make sure you configure a test solr connection and set up an empty Solr core using the same instructions as for the development core. -Note that python unit tests access a test server over HTTP, and therefore -expect static files to be compiled – see "Frontend development setup" above +Some python unit tests access rendered views, and therefore +expect static files to be compiled; see "Frontend development setup" above for how to do this. In a CI context, we use a fake webpack loader backend that ignores missing assets. diff --git a/dev-requirements.txt b/dev-requirements.txt index d47cc703..dffb9f5e 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,5 @@ -r requirements.txt -pytest>=5.0 +pytest>=7.0 pytest-django>=4.5.2 pytest-cov django-debug-toolbar diff --git a/pytest.ini b/pytest.ini index c6bbe1ea..06c46753 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,5 @@ [pytest] +pythonpath = . DJANGO_SETTINGS_MODULE=ppa.settings # look for tests in standard django test locations python_files = "ppa/**/tests.py" "ppa/**/tests/*.py" "ppa/tests.py" @@ -6,3 +7,8 @@ python_files = "ppa/**/tests.py" "ppa/**/tests/*.py" "ppa/tests.py" addopts = -p parasolr.django.disconnect_indexing # limit testpath to speed up collecting step testpaths = ppa +# suppress warnings (several coming up for dependencies as of 2024-04) +filterwarnings = + ignore::django.utils.deprecation.RemovedInDjango51Warning + ignore::django.utils.deprecation.RemovedInDjango60Warning + ignore::DeprecationWarning \ No newline at end of file From e3b482d329a0e86900f34f1d7691f709c3f305c2 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 14:33:51 -0400 Subject: [PATCH 52/71] Update and clean up sample local settings --- ppa/settings/local_settings.py.sample | 81 ++++++--------------------- 1 file changed, 18 insertions(+), 63 deletions(-) diff --git a/ppa/settings/local_settings.py.sample b/ppa/settings/local_settings.py.sample index fcb2cce8..078f78b2 100644 --- a/ppa/settings/local_settings.py.sample +++ b/ppa/settings/local_settings.py.sample @@ -1,63 +1,36 @@ -# Sample local settings -# Copy to derrida/local_settings.py and configure -# includes sensitive configurations, should *not* be -# checked into version control +# configurations that should not be checked into version control +# Copy to ppa/settings/local_settings.py and configure import os -# Build paths inside the project like this: os.path.join(BASE_DIR, ...) -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - - -# SECURITY WARNING: don't run with debug turned on in production! -DEBUG = True - -# Configure internal IPs for access to view debug toolbar -# INTERNAL_IPS = ['127.0.0.1'] - -ALLOWED_HOSTS = [] # SECURITY WARNING: keep the secret key used in production secret! # http://www.miniwebtool.com/django-secret-key-generator/ SECRET_KEY = '' - # Email address for a technical contact. # If set, will be used in From header for HathiTrust API requests # TECHNICAL_CONTACT = '' - # Turn this on in test/QA site to show test banner -#SHOW_TEST_WARNING = True +# SHOW_TEST_WARNING = True # Database -# https://docs.djangoproject.com/en/1.10/ref/settings/#databases -DATABASES = { - # sqlite for development - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), - } - # postgresql for qa/prod - # "default": { - # "ENGINE": "django.db.backends.postgresql", - # "NAME": "ppa", - # "USER": "ppa", - # "PASSWORD": "ppa", - # "HOST": "", # empty string for localhost - # "PORT": "", # empty string for default - # }, -} - -SOLR_CONNECTIONS = { - 'default': { - 'URL': 'http://localhost:8983/solr/', - 'COLLECTION': 'ppa', - 'CONFIGSET': 'ppa', - 'TEST': { - 'COMMITWITHIN': 100 - } +# override default database settings as needed +# default name and user are both "ppa" +# DATABASES["default"]["NAME"] = "" +# DATABASES["default"]["USER"] = "" +DATABASES["default"]["PASSWORD"] = "pass!@#$" + +# override default Solr configuration as needed +# default collection and configset are both "ppa" +SOLR_CONNECTIONS["default"].update( + { + "URL": "http://localhost:8983/solr/", +# "COLLECTION": "ppa", +# "CONFIGSET": "ppa", + "TEST": {"COMMITWITHIN": 100}, } -} +) # local path to hathi pairtree data provided via rsync HATHI_DATA = '/path/to/hathi_pairtree_root' @@ -81,13 +54,6 @@ PUCAS_LDAP.update({ 'SEARCH_FILTER': "(uid=%(user)s)", }) - -# Absolute path to the directory static files should be collected to. -# Don't put anything in this directory yourself; store your static files -# in apps' "static/" subdirectories and in STATICFILES_DIRS. -# Example: "/home/media/media.lawrence.com/static/" -STATIC_ROOT = os.path.join(BASE_DIR, STATIC_URL.strip("/")) - # Admin email configuration for error messages # ADMINS = [('name', 'email')] # SERVER_EMAIL = ' @@ -132,14 +98,3 @@ LOGGING = { }, } } - -# https://github.com/mozilla/django-csp -# Content security policy controls - see `settings.py` for policy settings. -# In development, leave both lines commented out to block & not report. -# In QA, set REPORT_ONLY to True and specify a "report-only" endpoint. -# In production, set REPORT_ONLY to False and specify an "enforced" endpoint. -# CSP_REPORT_ONLY = False -# CSP_REPORT_URI = '' - -# Turn off caching for static assets -WEBPACK_LOADER['DEFAULT']['CACHE'] = False From c611cbe48ac4919d0d64ae9b7f20849e499fa7a3 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 11:17:38 -0400 Subject: [PATCH 53/71] Cleanup debug print statements in unit tests --- ppa/archive/tests/test_models.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ppa/archive/tests/test_models.py b/ppa/archive/tests/test_models.py index d8bc764b..eaa2b7f1 100644 --- a/ppa/archive/tests/test_models.py +++ b/ppa/archive/tests/test_models.py @@ -553,15 +553,11 @@ def test_index_data(self): def test_get_absolute_url(self): work = DigitizedWork.objects.filter(pages_orig="").first() - print(work) assert work.get_absolute_url() == reverse( "archive:detail", kwargs={"source_id": work.source_id} ) work.pages_orig = "11-13" - print(work) - print(work.first_page()) - print(work.first_page_original()) assert work.get_absolute_url() == reverse( "archive:detail", kwargs={"source_id": work.source_id, "start_page": 11} ) From 37ec6e0333b449776f07e15d95a64d78a865b919 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 14:55:42 -0400 Subject: [PATCH 54/71] Document order of steps (rsync, reindex, correct excerpts) for deploy --- DEPLOYNOTES.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/DEPLOYNOTES.rst b/DEPLOYNOTES.rst index 19773102..dac7c318 100644 --- a/DEPLOYNOTES.rst +++ b/DEPLOYNOTES.rst @@ -8,6 +8,26 @@ Deploy and Upgrade notes * Settings are now configured with django-split-settings as a module; the path to local_settings.py is now ppa/settings/local_settings.py +* Index ids for excerpts have changed; this requires reindexing works + and pages for excerpts and articles; pages should be indexed + after running rsync. To reindex works:: + + python manage.py index -i work + +* Local pairtree data should be updated for all HathiTrust works:: + + python manage.py hathi_rsync + +* After pairtree content has been updated, pages should be updated + in Solr:: + + python manage.py index_pages + +* Digital page ranges for HathiTrust excerpts should be corrected + using a CSV file provided by the project team:: + + python manage.py adjust_excerpts HT_excerpt_corrections.csv + 3.11.2 ------ From 3dd8e8b2b2e31de4a26f35bceadc638f5889c4f8 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 15:04:48 -0400 Subject: [PATCH 55/71] Fix formatting in deploy notes; clarify local settings change [skip ci] --- DEPLOYNOTES.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/DEPLOYNOTES.rst b/DEPLOYNOTES.rst index dac7c318..309ef291 100644 --- a/DEPLOYNOTES.rst +++ b/DEPLOYNOTES.rst @@ -7,26 +7,26 @@ Deploy and Upgrade notes ---- * Settings are now configured with django-split-settings as a module; - the path to local_settings.py is now ppa/settings/local_settings.py + local_settings.py must be moved to ppa/settings/local_settings.py * Index ids for excerpts have changed; this requires reindexing works and pages for excerpts and articles; pages should be indexed after running rsync. To reindex works:: - python manage.py index -i work + python manage.py index -i work * Local pairtree data should be updated for all HathiTrust works:: - python manage.py hathi_rsync + python manage.py hathi_rsync * After pairtree content has been updated, pages should be updated in Solr:: - python manage.py index_pages + python manage.py index_pages * Digital page ranges for HathiTrust excerpts should be corrected using a CSV file provided by the project team:: - python manage.py adjust_excerpts HT_excerpt_corrections.csv + python manage.py adjust_excerpts HT_excerpt_corrections.csv 3.11.2 From 12d1da978c7c11cce295cc5c3f7844703a1e4fe0 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 15:22:01 -0400 Subject: [PATCH 56/71] Fix configuration for optional django-debug-toolbar --- ppa/settings/components/debug.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ppa/settings/components/debug.py b/ppa/settings/components/debug.py index cd2fde28..622ad4dc 100644 --- a/ppa/settings/components/debug.py +++ b/ppa/settings/components/debug.py @@ -1,10 +1,9 @@ -# if django-debug-toolbar is installed, enable it - -from ppa.settings.components.base import INSTALLED_APPS, MIDDLEWARE +from ppa.settings import INSTALLED_APPS, MIDDLEWARE # Configure internal IPs for access to view debug toolbar INTERNAL_IPS = ["127.0.0.1", "localhost"] +# if django-debug-toolbar is installed, enable it try: INSTALLED_APPS.append("debug_toolbar") MIDDLEWARE += ("debug_toolbar.middleware.DebugToolbarMiddleware",) From 07ac7a538afbcf1ad78a0093c3c03e653b29e4c6 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 15:22:01 -0400 Subject: [PATCH 57/71] Fix configuration for optional django-debug-toolbar --- ppa/urls.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/ppa/urls.py b/ppa/urls.py index 56aaa640..3543732e 100644 --- a/ppa/urls.py +++ b/ppa/urls.py @@ -58,16 +58,17 @@ # serve media content for development if settings.DEBUG: - import debug_toolbar - urlpatterns = [ - # include debug toolbar urls first to avoid getting caught by other urls - re_path(r"^__debug__/", include(debug_toolbar.urls)), - re_path( - r"^media/(?P.*)$", - serve, - { - "document_root": settings.MEDIA_ROOT, - }, - ), + re_path(r"^media/(?P.*)$", serve, {"document_root": settings.MEDIA_ROOT}), ] + urlpatterns + + try: + # include debug toolbar when available + import debug_toolbar + + urlpatterns = [ + # include debug toolbar urls first to avoid getting caught by other urls + re_path(r"^__debug__/", include(debug_toolbar.urls)), + ] + urlpatterns + except ImportError: + pass From ed0f4d8b69cbf416ab84a1a6a3c30cf4b8065eee Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 15:31:23 -0400 Subject: [PATCH 58/71] Actually commit import of debug_toolbar to check for import error - mark so ruff does not clean up unused import --- ppa/settings/components/debug.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ppa/settings/components/debug.py b/ppa/settings/components/debug.py index 622ad4dc..39f55369 100644 --- a/ppa/settings/components/debug.py +++ b/ppa/settings/components/debug.py @@ -5,6 +5,8 @@ # if django-debug-toolbar is installed, enable it try: + import debug_toolbar # noqa: F401 (do not clean up unused import) + INSTALLED_APPS.append("debug_toolbar") MIDDLEWARE += ("debug_toolbar.middleware.DebugToolbarMiddleware",) except ImportError: From 00b8771d04a8d424d1dea5909dab8d65e76c9b7f Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 4 Apr 2024 15:59:26 -0400 Subject: [PATCH 59/71] Index work first page as string instead of integer in solr since it may now include non-numeric labels, e.g. roman numerals --- ppa/archive/models.py | 2 +- ppa/archive/solr.py | 48 +++++++++++++++++++------------------------ 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/ppa/archive/models.py b/ppa/archive/models.py index 204ca515..7b1af106 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -924,7 +924,7 @@ def index_data(self): return { "id": index_id, "source_id": self.source_id, - "first_page_i": self.first_page(), + "first_page_s": self.first_page(), "group_id_s": index_id, # for grouping pages by work or excerpt "source_t": self.get_source_display(), "source_url": self.source_url, diff --git a/ppa/archive/solr.py b/ppa/archive/solr.py index 5cb251e1..04e7961e 100644 --- a/ppa/archive/solr.py +++ b/ppa/archive/solr.py @@ -6,7 +6,6 @@ class ArchiveSearchQuerySet(AliasedSolrQuerySet): - # search title query field syntax # (query field configured in solr config; searches title & subtitle with # boosting) @@ -32,7 +31,7 @@ class ArchiveSearchQuerySet(AliasedSolrQuerySet): "collections", "source_t", "image_id_s", - "first_page_i", + "first_page_s", "source_url", "work_type_s", "book_journal_s", @@ -44,7 +43,7 @@ class ArchiveSearchQuerySet(AliasedSolrQuerySet): aliases = { "source_t": "source", "image_id_s": "image_id", - "first_page_i": "first_page", + "first_page_s": "first_page", "work_type_s": "work_type", "book_journal_s": "book_journal", "group_id_s": "group_id", @@ -55,8 +54,9 @@ class ArchiveSearchQuerySet(AliasedSolrQuerySet): within_cluster_id = None def __init__(self, solr=None): - # field aliases: keys return the fields that will be returned from Solr for search page; - # values provide an aliased name if it should be different than solr index field. + # field aliases: keys return the fields that will be returned + # from Solr for search page; values provide an aliased name if + # it should be different than solr index field. # use alias if one is set, otherwise use field name self.field_aliases = { self.aliases.get(key, key): key for key in self.return_fields @@ -120,12 +120,13 @@ def query_opts(self): # when searching within a cluster, collapse on group id collapse_on = "group_id_s" if self.within_cluster_id else "cluster_id_s" - # @NOTE: Role of order here in separating works from pages (works < pages) may need to be revisited eventually. + # NOTE: Role of order here in separating works from pages (works < pages) + # may need to be revisited eventually. collapse_filter = '{!collapse field=%s sort="order asc"}' % collapse_on - - # We can apply collapse here since we need it for both keyword query case and not - # Remember that cluster_id_s is now defined as `str(self.cluster) if self.cluster else index_id` in models.py. - # So collapsing by "cluster" id implicitly includes works with no cluster id set. + + # We can apply collapse here since we need it for default search + # cluster id corresponds to index id for works not in a cluster, + # so collapsing by cluster id still includes works with no cluster id qs_copy = qs_copy.filter(collapse_filter) # if there is no keyword search present, only works should @@ -163,16 +164,10 @@ def query_opts(self): qs_copy = qs_copy.raw_query_parameters(work_query=work_query) content_query = "content:(%s)" % self.keyword_query - qs_copy = ( - qs_copy.search(combined_query) - # .filter(collapse_filter) # This no longer needed since applied above in `qs_copy = qs_copy.filter(collapse_filter)` - .raw_query_parameters( - content_query=content_query, - keyword_query=self.keyword_query, - # expand="true", - work_query=work_query, - # **{"expand.rows": 1}, - ) + qs_copy = qs_copy.search(combined_query).raw_query_parameters( + content_query=content_query, + keyword_query=self.keyword_query, + work_query=work_query, ) return qs_copy._base_query_opts() @@ -182,18 +177,17 @@ def _base_query_opts(self): return super().query_opts() - class PageSearchQuerySet(AliasedSolrQuerySet): # aliases for any fields we want to rename for search and display # includes non-renamed fields to push them into the return field_aliases = { - "id":"id", - "score":"score", - "order":"order", - "title":"title", - "label":"label", + "id": "id", + "score": "score", + "order": "order", + "title": "title", + "label": "label", "source_id": "source_id", "image_id": "image_id_s", "group_id": "group_id_s", "cluster_id": "cluster_id_s", - } \ No newline at end of file + } From 2065c1ea81e8082112381bdc5752431b68033ead Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 5 Apr 2024 12:08:16 -0400 Subject: [PATCH 60/71] Link to original source must use first digital page, not original ref #555 --- ppa/archive/templates/archive/digitizedwork_detail.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ppa/archive/templates/archive/digitizedwork_detail.html b/ppa/archive/templates/archive/digitizedwork_detail.html index 2c11d9f6..cac145aa 100644 --- a/ppa/archive/templates/archive/digitizedwork_detail.html +++ b/ppa/archive/templates/archive/digitizedwork_detail.html @@ -110,11 +110,11 @@

{{ object.title }}

{{ object.get_source_link_label }} - {% if object.pages_digital %} {# if page range is defined (excerpt/article), link to first page in range #} + {% if object.pages_digital %} {# if page range is defined (excerpt/article), link to first *digital* page in range #} {% if object.source == object.HATHI %} - {{ object.source_id }} + {{ object.source_id }} {% elif object.source == object.GALE %} - {{ object.source_id }} + {{ object.source_id }} {% endif %} {% else %} {# when there is no page range, use source url #} {{ object.source_id }} From 5d5ed2af1ec07439526179184bcc6e761aeefda1 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 8 Apr 2024 16:12:29 -0400 Subject: [PATCH 61/71] Adjust and test view regex for excerpt with single page ref #555 --- ppa/archive/tests/test_views.py | 6 ++++++ ppa/archive/views.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ppa/archive/tests/test_views.py b/ppa/archive/tests/test_views.py index 126f9fbf..c4992150 100644 --- a/ppa/archive/tests/test_views.py +++ b/ppa/archive/tests/test_views.py @@ -457,6 +457,12 @@ def test_get_queryset(self, mock_index_items): # start page 20 should match 20 only and not 200 assert response.context["object"] == dial_excerpt2 + # single page should also work + dial_excerpt2.pages_orig = "20" + dial_excerpt2.save() + response = self.client.get(dial_excerpt2.get_absolute_url()) + assert response.context["object"] == dial_excerpt2 + # create excerpt where there is no existing work; # set old_workid based on first digital page excerpt = DigitizedWork.objects.create( diff --git a/ppa/archive/views.py b/ppa/archive/views.py index b4f845c2..29c64668 100644 --- a/ppa/archive/views.py +++ b/ppa/archive/views.py @@ -299,7 +299,7 @@ def get_queryset(self): start_page = self.kwargs.get("start_page") # if start page is specified, filter to get the correct excerpt if start_page: - qs = source_qs.filter(pages_orig__regex=f"^{start_page}([,-]|\b)") + qs = source_qs.filter(pages_orig__regex=f"^{start_page}([,-]|\b|$)") # if start page is NOT specified, ensure we do not retrieve an excerpt else: qs = source_qs.filter(pages_orig__exact="") From c9e4f8627ec60b44b575e4987cbbfbe8be63aa84 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 8 Apr 2024 16:13:14 -0400 Subject: [PATCH 62/71] Update DigitizedWork string method to use original pages ref #555 --- ppa/archive/models.py | 6 +++--- ppa/archive/tests/test_models.py | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ppa/archive/models.py b/ppa/archive/models.py index 7b1af106..958f14ec 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -516,9 +516,9 @@ def get_absolute_url(self): def __str__(self): """Default string display. Uses :attr:`source_id` - and :attr:`pages_digital` if any""" - if self.pages_digital: - return "%s (%s)" % (self.source_id, self.pages_digital) + and :attr:`pages_orig` if any""" + if self.pages_orig: + return "%s (%s)" % (self.source_id, self.pages_orig) return self.source_id @property diff --git a/ppa/archive/tests/test_models.py b/ppa/archive/tests/test_models.py index eaa2b7f1..0c1d3d78 100644 --- a/ppa/archive/tests/test_models.py +++ b/ppa/archive/tests/test_models.py @@ -190,8 +190,9 @@ def test_str(self): digwork = DigitizedWork(source_id="njp.32101013082597") assert str(digwork) == digwork.source_id - # with pages - digwork.pages_digital = "20-25" + # with pages - should use *original*, not digital + digwork.pages_orig = "20-25" + digwork.pages_digital = "22-27" assert str(digwork) == "%s (20-25)" % digwork.source_id def test_display_title(self): From e21e49fc0f1918a41ac922824e5cc4fa08d43403 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 8 Apr 2024 16:36:22 -0400 Subject: [PATCH 63/71] Improve behavior for digwork admin source link #427 --- ppa/archive/admin.py | 15 +++++++++++++-- ppa/archive/tests/test_admin.py | 15 +++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/ppa/archive/admin.py b/ppa/archive/admin.py index e9266364..346cdb60 100644 --- a/ppa/archive/admin.py +++ b/ppa/archive/admin.py @@ -16,6 +16,7 @@ ProtectedWorkFieldFlags, ) from ppa.archive.views import ImportView +from ppa.archive.templatetags.ppa_tags import hathi_page_url, gale_page_url # import/export resource @@ -175,9 +176,19 @@ def list_collections(self, obj): list_collections.short_description = "Collections" def source_link(self, obj): - """Link to source record""" + """source id as an html link to source record, when source url is available""" + if not obj.source_url: + return obj.source_id + + source_url = obj.source_url + # hathi/gale excerpt links should include first page + if obj.pages_digital: + if obj.source == DigitizedWork.HATHI: + source_url = hathi_page_url(obj.source_url, obj.first_page_digital()) + if obj.source == DigitizedWork.GALE: + source_url = gale_page_url(obj.source_url, obj.first_page_digital()) return mark_safe( - '%s' % (obj.source_url, obj.source_id) + '%s' % (source_url, obj.source_id) ) source_link.short_description = "Source id" diff --git a/ppa/archive/tests/test_admin.py b/ppa/archive/tests/test_admin.py index d3d6c067..7948a1e6 100644 --- a/ppa/archive/tests/test_admin.py +++ b/ppa/archive/tests/test_admin.py @@ -51,6 +51,21 @@ def test_source_link(self): assert ( snippet == 'njp.32101013082597' % fake_url ) + # excerpt with digital page + digwork.pages_digital = "22-30" + # - HathiTrust + digwork.source = DigitizedWork.HATHI + snippet = digadmin.source_link(digwork) + assert digwork.source_id in snippet + assert "seq=22" in snippet + # Gale + digwork.source = DigitizedWork.GALE + snippet = digadmin.source_link(digwork) + assert "&pg=22" in snippet + + # no url - id only, no link + digwork.source_url = "" + assert digadmin.source_link(digwork) == digwork.source_id def test_readonly_fields(self): site = AdminSite() From 83f6a63e1f91fdbff1ae0034e4e5ff00cd9ee86b Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 8 Apr 2024 17:10:13 -0400 Subject: [PATCH 64/71] Add custom validation to ensure source id + first page orig is unique ref #555 --- ppa/archive/models.py | 26 ++++++++++++++++++++++++++ ppa/archive/tests/test_models.py | 19 +++++++++++++++++++ ppa/archive/views.py | 2 +- 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/ppa/archive/models.py b/ppa/archive/models.py index 958f14ec..65875433 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -310,6 +310,12 @@ def validate_page_range(value): ) +class DigitizedWorkQuerySet(models.QuerySet): + def by_first_page_orig(self, start_page): + "find records based on first page in original page range" + return self.filter(pages_orig__regex=f"^{start_page}([,-]|\b|$)") + + class DigitizedWork(ModelIndexable, TrackChangesModel): """ Record to manage digitized works included in PPA and store their basic @@ -488,6 +494,9 @@ class DigitizedWork(ModelIndexable, TrackChangesModel): blank=True, ) + # use custom queryset + objects = DigitizedWorkQuerySet.as_manager() + class Meta: ordering = ("sort_title",) # require unique combination of source id + page range, @@ -654,6 +663,23 @@ def clean(self): "Changing source ID for HathiTrust records is not supported" ) + # if original page range is set, check that first page is unique + if self.pages_orig: + first_page = self.first_page_original() + # check for other excerpts in this work with the same first page + other_excerpts = DigitizedWork.objects.filter( + source_id=self.source_id + ).by_first_page_orig(first_page) + # if this record has already been saved, exclude it when checking + if self.pk: + other_excerpts.exclude(pk=self.pk) + if other_excerpts.exists(): + raise ValidationError( + { + "pages_orig": f"First page {first_page} is not unique for this source", + } + ) + def compare_protected_fields(self, db_obj): """Compare protected fields in a :class:`ppa.archive.models.DigitizedWork` instance and return those diff --git a/ppa/archive/tests/test_models.py b/ppa/archive/tests/test_models.py index 0c1d3d78..b6eef6b3 100644 --- a/ppa/archive/tests/test_models.py +++ b/ppa/archive/tests/test_models.py @@ -803,6 +803,25 @@ def test_clean(self): work.source = DigitizedWork.OTHER work.clean() + def test_clean_unique_first_page(self): + DigitizedWork.objects.create( + source_id="chi.79279237", pages_orig="233-244", pages_digital="200-210" + ) + # first original page matches even though range is distinct; unsaved + work2 = DigitizedWork(source_id="chi.79279237", pages_orig="233-240") + with pytest.raises( + ValidationError, match="First page 233 is not unique for this source" + ): + work2.clean() + + # test updating existing record; same error + work2 = DigitizedWork.objects.create(source_id="chi.79279237", pages_orig="232") + work2.pages_orig = "233-235" + with pytest.raises( + ValidationError, match="First page 233 is not unique for this source" + ): + work2.clean() + def test_clean_fields(self): work = DigitizedWork( source_id="chi.79279237", diff --git a/ppa/archive/views.py b/ppa/archive/views.py index 29c64668..a063070a 100644 --- a/ppa/archive/views.py +++ b/ppa/archive/views.py @@ -299,7 +299,7 @@ def get_queryset(self): start_page = self.kwargs.get("start_page") # if start page is specified, filter to get the correct excerpt if start_page: - qs = source_qs.filter(pages_orig__regex=f"^{start_page}([,-]|\b|$)") + qs = source_qs.by_first_page_orig(start_page) # if start page is NOT specified, ensure we do not retrieve an excerpt else: qs = source_qs.filter(pages_orig__exact="") From 14294af831d0d4f9af6db64831dbabc3475ed138 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Mon, 8 Apr 2024 17:18:01 -0400 Subject: [PATCH 65/71] Use mock to skip trying to index pages when testing clean logic --- ppa/archive/tests/test_models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ppa/archive/tests/test_models.py b/ppa/archive/tests/test_models.py index b6eef6b3..7f8f05b8 100644 --- a/ppa/archive/tests/test_models.py +++ b/ppa/archive/tests/test_models.py @@ -803,7 +803,8 @@ def test_clean(self): work.source = DigitizedWork.OTHER work.clean() - def test_clean_unique_first_page(self): + @patch("ppa.archive.models.DigitizedWork.index_items") + def test_clean_unique_first_page(self, mock_index_items): DigitizedWork.objects.create( source_id="chi.79279237", pages_orig="233-244", pages_digital="200-210" ) From f96c3d81ea8f6d14a905f149b84555ce457e13af Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 9 Apr 2024 10:09:11 -0400 Subject: [PATCH 66/71] Correct hathi page link generation for excerpts in admin #427 --- ppa/archive/admin.py | 4 +++- ppa/archive/tests/test_admin.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ppa/archive/admin.py b/ppa/archive/admin.py index 346cdb60..c9a1dc18 100644 --- a/ppa/archive/admin.py +++ b/ppa/archive/admin.py @@ -184,8 +184,10 @@ def source_link(self, obj): # hathi/gale excerpt links should include first page if obj.pages_digital: if obj.source == DigitizedWork.HATHI: - source_url = hathi_page_url(obj.source_url, obj.first_page_digital()) + # hathi page url method requires source id + source_url = hathi_page_url(obj.source_id, obj.first_page_digital()) if obj.source == DigitizedWork.GALE: + # gale page url method requires source url source_url = gale_page_url(obj.source_url, obj.first_page_digital()) return mark_safe( '%s' % (source_url, obj.source_id) diff --git a/ppa/archive/tests/test_admin.py b/ppa/archive/tests/test_admin.py index 7948a1e6..447621e8 100644 --- a/ppa/archive/tests/test_admin.py +++ b/ppa/archive/tests/test_admin.py @@ -58,6 +58,8 @@ def test_source_link(self): snippet = digadmin.source_link(digwork) assert digwork.source_id in snippet assert "seq=22" in snippet + # hathi url is based on source id, not source url + assert digwork.source_url not in snippet # Gale digwork.source = DigitizedWork.GALE snippet = digadmin.source_link(digwork) From 0c94b90914da2f9fd0726965ef29cf1cb9489d06 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 9 Apr 2024 10:16:02 -0400 Subject: [PATCH 67/71] Update hathi page url to use normal url params syntax Older format with semicolons now redirects to standard format; correction identified by @mnaydan --- ppa/archive/templatetags/ppa_tags.py | 4 +++- ppa/archive/tests/test_templatetags.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ppa/archive/templatetags/ppa_tags.py b/ppa/archive/templatetags/ppa_tags.py index 26ff8766..b9edc0df 100644 --- a/ppa/archive/templatetags/ppa_tags.py +++ b/ppa/archive/templatetags/ppa_tags.py @@ -71,7 +71,9 @@ def hathi_page_url(item_id, order): {% page_url item_id page.order %} """ - return "{}/pt?id={};view=1up;seq={}".format(HATHI_BASE_URL, item_id, order) + return mark_safe( + "{}/pt?id={}&view=1up&seq={}".format(HATHI_BASE_URL, item_id, order) + ) @register.simple_tag diff --git a/ppa/archive/tests/test_templatetags.py b/ppa/archive/tests/test_templatetags.py index eef33b1d..90073e0f 100644 --- a/ppa/archive/tests/test_templatetags.py +++ b/ppa/archive/tests/test_templatetags.py @@ -73,7 +73,7 @@ def test_hathi_page_url(): order = 50 hathi_url = hathi_page_url(item_id, order) assert hathi_url.startswith("%s/pt" % HATHI_BASE_URL) - assert hathi_url.endswith("?id=%s;view=1up;seq=%s" % (item_id, order)) + assert hathi_url.endswith("?id=%s&view=1up&seq=%s" % (item_id, order)) def test_gale_page_url(): From ff23f31e45abb43acd3cc8ebef1d152eca04d286 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 9 Apr 2024 10:19:47 -0400 Subject: [PATCH 68/71] Exclude current object when validating first page unique for source ref #555 --- ppa/archive/models.py | 2 +- ppa/archive/tests/test_models.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ppa/archive/models.py b/ppa/archive/models.py index 65875433..ba3ba8a6 100644 --- a/ppa/archive/models.py +++ b/ppa/archive/models.py @@ -672,7 +672,7 @@ def clean(self): ).by_first_page_orig(first_page) # if this record has already been saved, exclude it when checking if self.pk: - other_excerpts.exclude(pk=self.pk) + other_excerpts = other_excerpts.exclude(pk=self.pk) if other_excerpts.exists(): raise ValidationError( { diff --git a/ppa/archive/tests/test_models.py b/ppa/archive/tests/test_models.py index 7f8f05b8..3141ab3a 100644 --- a/ppa/archive/tests/test_models.py +++ b/ppa/archive/tests/test_models.py @@ -805,9 +805,14 @@ def test_clean(self): @patch("ppa.archive.models.DigitizedWork.index_items") def test_clean_unique_first_page(self, mock_index_items): - DigitizedWork.objects.create( + digwork = DigitizedWork.objects.create( source_id="chi.79279237", pages_orig="233-244", pages_digital="200-210" ) + # save with unrelated change; should not trigger validation error + digwork.pages_digital = "201-210" + digwork.save() + digwork.clean() + # first original page matches even though range is distinct; unsaved work2 = DigitizedWork(source_id="chi.79279237", pages_orig="233-240") with pytest.raises( From 3264dc7fcf8d006a47ea2cc2dfa7d62f7633e788 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 11 Apr 2024 16:22:55 -0400 Subject: [PATCH 69/71] Set version to 3.12 and document changes --- CHANGELOG.rst | 15 +++++++++++++++ ppa/__init__.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f7ccb237..e25bbcbb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,6 +3,21 @@ CHANGELOG ========= +3.12 +---- +- As an admin, I want the Source ID link in list view to go to the first page of the excerpt for articles and excerpts, so that I can more easily access excerpt content. +- As a developer, I want a script to do a one-time bulk fix of HathiTrust excerpt page ranges from a spreadsheet so that we can pull the corret content from updated HathiTrust materials. +- As a developer, I want a script to update all HathiTrust content so that I can refresh locally cached data with OCR improvements and other changes. +- bugfix: excerpt work ID is now based on sourceID + original page range + rather than digital page range +- bugfix: fix indexing and page count for new excerpts when there are multiple excerpts from a single source +- bugfix: improved index_pages script error handling for missing page count + in database when running in expedited mode +- new manage command to to report on possible HathiTrust excerpt page range mismatches based on page labels in METS-ALTO +- utility script to get volume last modification date from public HathiTrust website +- updated settings to use django-split-settings +- address deprecation warnings and suppress warnings for dependencies + 3.11.4 ------ diff --git a/ppa/__init__.py b/ppa/__init__.py index 2078fb24..04ff3c86 100644 --- a/ppa/__init__.py +++ b/ppa/__init__.py @@ -1,4 +1,4 @@ -__version_info__ = (3, 12, 0, "dev") +__version_info__ = (3, 12, 0, None) # Dot-connect all but the last. Last is dash-connected if not None. From 81f05df7abed563e76ee206b88ac9c689afe4d8a Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 11 Apr 2024 17:37:04 -0400 Subject: [PATCH 70/71] Update npm packages via npm audit fix --- package-lock.json | 361 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 251 insertions(+), 110 deletions(-) diff --git a/package-lock.json b/package-lock.json index 93e7b60f..ef828af4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4918,21 +4918,21 @@ } }, "node_modules/body-parser": { - "version": "1.20.0", - "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.0.tgz", - "integrity": "sha512-DfJ+q6EPcGKZD1QWUjSpqp+Q7bDQTsQIF4zfUAtZ6qk+H/3/QRhg9CEp39ss+/T2vw0+HaidC0ecJj/DRLIaKg==", + "version": "1.20.2", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.2.tgz", + "integrity": "sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==", "dev": true, "dependencies": { "bytes": "3.1.2", - "content-type": "~1.0.4", + "content-type": "~1.0.5", "debug": "2.6.9", "depd": "2.0.0", "destroy": "1.2.0", "http-errors": "2.0.0", "iconv-lite": "0.4.24", "on-finished": "2.4.1", - "qs": "6.10.3", - "raw-body": "2.5.1", + "qs": "6.11.0", + "raw-body": "2.5.2", "type-is": "~1.6.18", "unpipe": "1.0.0" }, @@ -4963,9 +4963,9 @@ } }, "node_modules/body-parser/node_modules/qs": { - "version": "6.10.3", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.10.3.tgz", - "integrity": "sha512-wr7M2E0OFRfIfJZjKGieI8lBKb7fRCH4Fv5KNPEs7gJ8jadvotdsS08PzOKR7opXhZ/Xkjtt3WF9g38drmyRqQ==", + "version": "6.11.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz", + "integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==", "dev": true, "dependencies": { "side-channel": "^1.0.4" @@ -5158,12 +5158,18 @@ } }, "node_modules/call-bind": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz", - "integrity": "sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==", + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz", + "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==", "dependencies": { - "function-bind": "^1.1.1", - "get-intrinsic": "^1.0.2" + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", + "function-bind": "^1.1.2", + "get-intrinsic": "^1.2.4", + "set-function-length": "^1.2.1" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -5890,9 +5896,9 @@ ] }, "node_modules/content-type": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz", - "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==", + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", "dev": true, "engines": { "node": ">= 0.6" @@ -6756,6 +6762,22 @@ "node": ">=0.8" } }, + "node_modules/define-data-property": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", + "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", + "dependencies": { + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", + "gopd": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/define-lazy-prop": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz", @@ -7375,6 +7397,25 @@ "is-arrayish": "^0.2.1" } }, + "node_modules/es-define-property": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz", + "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==", + "dependencies": { + "get-intrinsic": "^1.2.4" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "engines": { + "node": ">= 0.4" + } + }, "node_modules/es-module-lexer": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.3.0.tgz", @@ -7712,17 +7753,17 @@ } }, "node_modules/express": { - "version": "4.18.1", - "resolved": "https://registry.npmjs.org/express/-/express-4.18.1.tgz", - "integrity": "sha512-zZBcOX9TfehHQhtupq57OF8lFZ3UZi08Y97dwFCkD8p9d/d2Y3M+ykKcwaMDEL+4qyUolgBDX6AblpR3fL212Q==", + "version": "4.19.2", + "resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz", + "integrity": "sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==", "dev": true, "dependencies": { "accepts": "~1.3.8", "array-flatten": "1.1.1", - "body-parser": "1.20.0", + "body-parser": "1.20.2", "content-disposition": "0.5.4", "content-type": "~1.0.4", - "cookie": "0.5.0", + "cookie": "0.6.0", "cookie-signature": "1.0.6", "debug": "2.6.9", "depd": "2.0.0", @@ -7738,7 +7779,7 @@ "parseurl": "~1.3.3", "path-to-regexp": "0.1.7", "proxy-addr": "~2.0.7", - "qs": "6.10.3", + "qs": "6.11.0", "range-parser": "~1.2.1", "safe-buffer": "5.2.1", "send": "0.18.0", @@ -7760,9 +7801,9 @@ "dev": true }, "node_modules/express/node_modules/cookie": { - "version": "0.5.0", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz", - "integrity": "sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==", + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz", + "integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==", "dev": true, "engines": { "node": ">= 0.6" @@ -7796,9 +7837,9 @@ } }, "node_modules/express/node_modules/qs": { - "version": "6.10.3", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.10.3.tgz", - "integrity": "sha512-wr7M2E0OFRfIfJZjKGieI8lBKb7fRCH4Fv5KNPEs7gJ8jadvotdsS08PzOKR7opXhZ/Xkjtt3WF9g38drmyRqQ==", + "version": "6.11.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz", + "integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==", "dev": true, "dependencies": { "side-channel": "^1.0.4" @@ -8657,13 +8698,18 @@ } }, "node_modules/get-intrinsic": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.1.2.tgz", - "integrity": "sha512-Jfm3OyCxHh9DJyc28qGk+JmfkpO41A4XkneDSujN9MDXrm4oDKdHvndhZ2dN94+ERNfkYJWDclW6k2L/ZGHjXA==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz", + "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==", "dependencies": { - "function-bind": "^1.1.1", - "has": "^1.0.3", - "has-symbols": "^1.0.3" + "es-errors": "^1.3.0", + "function-bind": "^1.1.2", + "has-proto": "^1.0.1", + "has-symbols": "^1.0.3", + "hasown": "^2.0.0" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -8859,6 +8905,17 @@ "node": ">= 0.10" } }, + "node_modules/gopd": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz", + "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==", + "dependencies": { + "get-intrinsic": "^1.1.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/graceful-fs": { "version": "4.2.10", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.10.tgz", @@ -9878,11 +9935,22 @@ } }, "node_modules/has-property-descriptors": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.0.tgz", - "integrity": "sha512-62DVLZGoiEBDHQyqG4w9xCuZ7eJEwNmJRWw2VY84Oedb7WFcA27fiEVe8oUQx9hAUJ4ekurquucTGwsyO1XGdQ==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", + "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", "dependencies": { - "get-intrinsic": "^1.1.1" + "es-define-property": "^1.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-proto": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz", + "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==", + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -9939,7 +10007,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.0.tgz", "integrity": "sha512-vUptKVTpIJhcczKBbgnS+RtcuYMB8+oNzPK2/Hp3hanz8JmpATdmmgLgSaadVREkDm+e2giHwY3ZRkyjSIDDFA==", - "dev": true, "dependencies": { "function-bind": "^1.1.2" }, @@ -15647,9 +15714,9 @@ } }, "node_modules/object-inspect": { - "version": "1.12.2", - "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.12.2.tgz", - "integrity": "sha512-z+cPxW0QGUp0mcqcsgQyLVRDoXFQbXOwBaqyF7VIgI4TWNQsDHrBpUQslRmIfAoYWdYzs6UlKJtB2XJpTaNSpQ==", + "version": "1.13.1", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz", + "integrity": "sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==", "dev": true, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -17412,9 +17479,9 @@ } }, "node_modules/raw-body": { - "version": "2.5.1", - "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.1.tgz", - "integrity": "sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig==", + "version": "2.5.2", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.2.tgz", + "integrity": "sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==", "dev": true, "dependencies": { "bytes": "3.1.2", @@ -18454,6 +18521,22 @@ "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", "integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==" }, + "node_modules/set-function-length": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", + "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==", + "dependencies": { + "define-data-property": "^1.1.4", + "es-errors": "^1.3.0", + "function-bind": "^1.1.2", + "get-intrinsic": "^1.2.4", + "gopd": "^1.0.1", + "has-property-descriptors": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, "node_modules/set-value": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/set-value/-/set-value-2.0.1.tgz", @@ -18536,14 +18619,18 @@ } }, "node_modules/side-channel": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz", - "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==", + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz", + "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==", "dev": true, "dependencies": { - "call-bind": "^1.0.0", - "get-intrinsic": "^1.0.2", - "object-inspect": "^1.9.0" + "call-bind": "^1.0.7", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.4", + "object-inspect": "^1.13.1" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -20728,9 +20815,9 @@ } }, "node_modules/webpack-dev-middleware": { - "version": "5.3.3", - "resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-5.3.3.tgz", - "integrity": "sha512-hj5CYrY0bZLB+eTO+x/j67Pkrquiy7kWepMHmUMoPsmcUaeEnQJqFzHJOyxgWlq746/wUuA64p9ta34Kyb01pA==", + "version": "5.3.4", + "resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-5.3.4.tgz", + "integrity": "sha512-BVdTqhhs+0IfoeAf7EoH5WE+exCmqGerHfDM0IL096Px60Tq2Mn9MAbnaGUe6HiMa41KMCYF19gyzZmBcq/o4Q==", "dev": true, "dependencies": { "colorette": "^2.0.10", @@ -25079,21 +25166,21 @@ } }, "body-parser": { - "version": "1.20.0", - "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.0.tgz", - "integrity": "sha512-DfJ+q6EPcGKZD1QWUjSpqp+Q7bDQTsQIF4zfUAtZ6qk+H/3/QRhg9CEp39ss+/T2vw0+HaidC0ecJj/DRLIaKg==", + "version": "1.20.2", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.2.tgz", + "integrity": "sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==", "dev": true, "requires": { "bytes": "3.1.2", - "content-type": "~1.0.4", + "content-type": "~1.0.5", "debug": "2.6.9", "depd": "2.0.0", "destroy": "1.2.0", "http-errors": "2.0.0", "iconv-lite": "0.4.24", "on-finished": "2.4.1", - "qs": "6.10.3", - "raw-body": "2.5.1", + "qs": "6.11.0", + "raw-body": "2.5.2", "type-is": "~1.6.18", "unpipe": "1.0.0" }, @@ -25114,9 +25201,9 @@ } }, "qs": { - "version": "6.10.3", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.10.3.tgz", - "integrity": "sha512-wr7M2E0OFRfIfJZjKGieI8lBKb7fRCH4Fv5KNPEs7gJ8jadvotdsS08PzOKR7opXhZ/Xkjtt3WF9g38drmyRqQ==", + "version": "6.11.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz", + "integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==", "dev": true, "requires": { "side-channel": "^1.0.4" @@ -25252,12 +25339,15 @@ } }, "call-bind": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz", - "integrity": "sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==", + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz", + "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==", "requires": { - "function-bind": "^1.1.1", - "get-intrinsic": "^1.0.2" + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", + "function-bind": "^1.1.2", + "get-intrinsic": "^1.2.4", + "set-function-length": "^1.2.1" } }, "callsites": { @@ -25806,9 +25896,9 @@ } }, "content-type": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz", - "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==", + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", "dev": true }, "convert-source-map": { @@ -26425,6 +26515,16 @@ } } }, + "define-data-property": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", + "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", + "requires": { + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", + "gopd": "^1.0.1" + } + }, "define-lazy-prop": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz", @@ -26893,6 +26993,19 @@ "is-arrayish": "^0.2.1" } }, + "es-define-property": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz", + "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==", + "requires": { + "get-intrinsic": "^1.2.4" + } + }, + "es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==" + }, "es-module-lexer": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.3.0.tgz", @@ -27160,17 +27273,17 @@ } }, "express": { - "version": "4.18.1", - "resolved": "https://registry.npmjs.org/express/-/express-4.18.1.tgz", - "integrity": "sha512-zZBcOX9TfehHQhtupq57OF8lFZ3UZi08Y97dwFCkD8p9d/d2Y3M+ykKcwaMDEL+4qyUolgBDX6AblpR3fL212Q==", + "version": "4.19.2", + "resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz", + "integrity": "sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==", "dev": true, "requires": { "accepts": "~1.3.8", "array-flatten": "1.1.1", - "body-parser": "1.20.0", + "body-parser": "1.20.2", "content-disposition": "0.5.4", "content-type": "~1.0.4", - "cookie": "0.5.0", + "cookie": "0.6.0", "cookie-signature": "1.0.6", "debug": "2.6.9", "depd": "2.0.0", @@ -27186,7 +27299,7 @@ "parseurl": "~1.3.3", "path-to-regexp": "0.1.7", "proxy-addr": "~2.0.7", - "qs": "6.10.3", + "qs": "6.11.0", "range-parser": "~1.2.1", "safe-buffer": "5.2.1", "send": "0.18.0", @@ -27205,9 +27318,9 @@ "dev": true }, "cookie": { - "version": "0.5.0", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz", - "integrity": "sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==", + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz", + "integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==", "dev": true }, "depd": { @@ -27232,9 +27345,9 @@ } }, "qs": { - "version": "6.10.3", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.10.3.tgz", - "integrity": "sha512-wr7M2E0OFRfIfJZjKGieI8lBKb7fRCH4Fv5KNPEs7gJ8jadvotdsS08PzOKR7opXhZ/Xkjtt3WF9g38drmyRqQ==", + "version": "6.11.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz", + "integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==", "dev": true, "requires": { "side-channel": "^1.0.4" @@ -27890,13 +28003,15 @@ } }, "get-intrinsic": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.1.2.tgz", - "integrity": "sha512-Jfm3OyCxHh9DJyc28qGk+JmfkpO41A4XkneDSujN9MDXrm4oDKdHvndhZ2dN94+ERNfkYJWDclW6k2L/ZGHjXA==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz", + "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==", "requires": { - "function-bind": "^1.1.1", - "has": "^1.0.3", - "has-symbols": "^1.0.3" + "es-errors": "^1.3.0", + "function-bind": "^1.1.2", + "has-proto": "^1.0.1", + "has-symbols": "^1.0.3", + "hasown": "^2.0.0" } }, "get-package-type": { @@ -28048,6 +28163,14 @@ "sparkles": "^1.0.0" } }, + "gopd": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz", + "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==", + "requires": { + "get-intrinsic": "^1.1.3" + } + }, "graceful-fs": { "version": "4.2.10", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.10.tgz", @@ -28890,13 +29013,18 @@ } }, "has-property-descriptors": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.0.tgz", - "integrity": "sha512-62DVLZGoiEBDHQyqG4w9xCuZ7eJEwNmJRWw2VY84Oedb7WFcA27fiEVe8oUQx9hAUJ4ekurquucTGwsyO1XGdQ==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", + "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", "requires": { - "get-intrinsic": "^1.1.1" + "es-define-property": "^1.0.0" } }, + "has-proto": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz", + "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==" + }, "has-symbols": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz", @@ -28935,7 +29063,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.0.tgz", "integrity": "sha512-vUptKVTpIJhcczKBbgnS+RtcuYMB8+oNzPK2/Hp3hanz8JmpATdmmgLgSaadVREkDm+e2giHwY3ZRkyjSIDDFA==", - "dev": true, "requires": { "function-bind": "^1.1.2" } @@ -33200,9 +33327,9 @@ } }, "object-inspect": { - "version": "1.12.2", - "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.12.2.tgz", - "integrity": "sha512-z+cPxW0QGUp0mcqcsgQyLVRDoXFQbXOwBaqyF7VIgI4TWNQsDHrBpUQslRmIfAoYWdYzs6UlKJtB2XJpTaNSpQ==", + "version": "1.13.1", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz", + "integrity": "sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==", "dev": true }, "object-keys": { @@ -34438,9 +34565,9 @@ "dev": true }, "raw-body": { - "version": "2.5.1", - "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.1.tgz", - "integrity": "sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig==", + "version": "2.5.2", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.2.tgz", + "integrity": "sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==", "dev": true, "requires": { "bytes": "3.1.2", @@ -35223,6 +35350,19 @@ "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", "integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==" }, + "set-function-length": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", + "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==", + "requires": { + "define-data-property": "^1.1.4", + "es-errors": "^1.3.0", + "function-bind": "^1.1.2", + "get-intrinsic": "^1.2.4", + "gopd": "^1.0.1", + "has-property-descriptors": "^1.0.2" + } + }, "set-value": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/set-value/-/set-value-2.0.1.tgz", @@ -35286,14 +35426,15 @@ "dev": true }, "side-channel": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz", - "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==", + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz", + "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==", "dev": true, "requires": { - "call-bind": "^1.0.0", - "get-intrinsic": "^1.0.2", - "object-inspect": "^1.9.0" + "call-bind": "^1.0.7", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.4", + "object-inspect": "^1.13.1" } }, "sigmund": { @@ -36939,9 +37080,9 @@ } }, "webpack-dev-middleware": { - "version": "5.3.3", - "resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-5.3.3.tgz", - "integrity": "sha512-hj5CYrY0bZLB+eTO+x/j67Pkrquiy7kWepMHmUMoPsmcUaeEnQJqFzHJOyxgWlq746/wUuA64p9ta34Kyb01pA==", + "version": "5.3.4", + "resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-5.3.4.tgz", + "integrity": "sha512-BVdTqhhs+0IfoeAf7EoH5WE+exCmqGerHfDM0IL096Px60Tq2Mn9MAbnaGUe6HiMa41KMCYF19gyzZmBcq/o4Q==", "dev": true, "requires": { "colorette": "^2.0.10", From 3953a6d4e469384c8a2452aa0145baffbcbe0e00 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 11 Apr 2024 17:37:15 -0400 Subject: [PATCH 71/71] Require parasolr 0.9.2 --- requirements.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0984208a..03f543a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,6 @@ django>=5.0,<5.1 pucas>=0.8 -# dev parasolr until next release -git+https://github.com/Princeton-CDH/parasolr@develop#egg=parasolr -#parasolr>=0.9 +parasolr>=0.9.2 pairtree py-flags # pymarc 5+ has incompatible changes