Merge pull request #731 from CDLUC3/develop

Merge Develop to main for removing n2t/binder code
CDLUC3 · Sep 11, 2024 · f144cad · f144cad
2 parents 6ea3bd3 + be3a0c1
commit f144cad
Show file tree

Hide file tree

Showing 21 changed files with 49 additions and 1,112 deletions.
diff --git a/ansible/group_vars/all b/ansible/group_vars/all
@@ -83,9 +83,6 @@ admin_username:                "{{ ssm_params['admin_username'] }}"
 
 allocator_cdl_password:        "{{ ssm_params['allocator_cdl_password'] }}"
 allocator_purdue_password:     "{{ ssm_params['allocator_purdue_password'] }}"
-binder_url:                    "{{ ssm_params['binder_url'] }}"
-binder_username:               "{{ ssm_params['binder_username'] }}"
-binder_password:               "{{ ssm_params['binder_password'] }}"
 cloudwatch_instance_name:      "{{ ansible_facts.hostname }}"
 crossref_username:             "{{ ssm_params['crossref_username'] }}"
 crossref_password:             "{{ ssm_params['crossref_password'] }}"

diff --git a/ansible/test_vars.yaml b/ansible/test_vars.yaml
@@ -37,8 +37,6 @@
   - name: return a single param based on ssm_root_path as variable
     ##debug: msg="{{ ssm_params['database_host'] }}"
     debug: msg="database_host {{ database_host }}"
-  - name: return binder_url
-    debug: msg="{{ binder_url }}"
   - name: return resolver_ark
     debug: msg="{{ resolver_ark }}"
 
diff --git a/ezidapp/management/commands/check-ezid.py b/ezidapp/management/commands/check-ezid.py
@@ -69,7 +69,6 @@
 }
 
 queueType = {
-    'binder': ezidapp.models.async_queue.BinderQueue,
     'crossref': ezidapp.models.async_queue.CrossrefQueue,
     'datacite': ezidapp.models.async_queue.DataciteQueue,
     'search': ezidapp.models.async_queue.SearchIndexerQueue

diff --git a/ezidapp/management/commands/diag-db-stats.py b/ezidapp/management/commands/diag-db-stats.py
@@ -72,14 +72,6 @@ def print_identifier(self, identifier):
         # print(id_model)
         # pprint.pp(id_model.cm)
 
-        print('-' * 100)
-
-        impl.enqueue.enqueueBinderIdentifier(
-            identifier=id_model.identifier,
-            operation='update',
-            blob={'x': 'y'},
-        )
-
         # impl.nog.util.print_table(row_list, log.info)
 
     def print_all(self):

diff --git a/ezidapp/management/commands/diag-identifier.py b/ezidapp/management/commands/diag-identifier.py
@@ -9,8 +9,6 @@
 This command does not alter any information in the database, and should be safe to run
 at any time, including a running production instance.
 
-Note however, that this command MAY alter the information in N2T when the --sync option
-is used. Confirmation is requested before any metadata updates are propagated to N2T.
 """
 
 import argparse
@@ -36,7 +34,6 @@
 import ezidapp.models.identifier
 import ezidapp.models.user
 import impl.datacite
-import impl.noid_egg
 
 log = logging.getLogger(__name__)
 
@@ -67,7 +64,7 @@ def add_arguments(self, parser:argparse.ArgumentParser):
 
         _show = subparsers.add_parser(
             "show",
-            help=("Show available metadata for an identifier, and optionally sync the N2T record.\n"
+            help=("Show available metadata for an identifier.\n"
                   "Example:\n"
                   "  Default:\n"
                   "    ./manage.py diag-identifier show ark:/62930/d1n739\n")
@@ -84,12 +81,6 @@ def add_arguments(self, parser:argparse.ArgumentParser):
             action='store_true',
             help='Show Identifier instead of SearchIdentifier table entry',
         )
-        _show.add_argument(
-            '-y',
-            '--legacy',
-            action='store_true',
-            help='Show legacy form of identifier record',
-        )
         _show.add_argument(
             '-m',
             '--cm',
@@ -108,17 +99,6 @@ def add_arguments(self, parser:argparse.ArgumentParser):
             action='store_true',
             help='Convert timestamps to textual time representation',
         )
-        _show.add_argument(
-            '-N',
-            '--N2T',
-            action='store_true',
-            help='Retrieve record from N2T if available',
-        )
-        _show.add_argument(
-            '--sync',
-            action='store_true',
-            help="Synchronize the N2T entry with metadata from the database.",
-        )
 
         _list = subparsers.add_parser(
             "list",
@@ -154,11 +134,6 @@ def add_arguments(self, parser:argparse.ArgumentParser):
             default=[],
             help="Comma separated list of fields in addition to identifier to list."
         )
-        _list.add_argument(
-            '--compare',
-            action='store_true',
-            help='Show difference between EZID and N2T metadata.',
-        )
         _list.add_argument(
             '-m',
             '--max_rows',
@@ -199,95 +174,6 @@ def add_arguments(self, parser:argparse.ArgumentParser):
             help="Ending date for metrics"
         )
 
-        _syncmeta = subparsers.add_parser(
-            "syncmeta",
-            help=("Sends metadata to N2T for each row text file source. Rows starting with space or '#' are ignored.\n"
-                  "Example:\n"
-                  "  ./manage.py diag-identifier syncmeta -f pid_list.txt"
-                  )
-        )
-        _syncmeta.add_argument(
-            '-f',
-            '--from',
-            type=str,
-            help="Text file with one identifier per line.",
-            required=True
-        )
-        _syncmeta.add_argument(
-            '-s',
-            '--start',
-            type=str,
-            help="Identifier in list to start from",
-            default=None
-        )
-
-
-    def diff_n2t(self, identifier:ezidapp.models.identifier)->dict:
-        res = {}
-        n2t_meta = impl.noid_egg.getElements(identifier.identifier)
-        if n2t_meta is None:
-            n2t_meta = {}
-        _legacy = identifier.toLegacy()
-        for k, v in _legacy.items():
-            res[k] = [v, None]
-        # If properties retrieved from N2T are not present in the supplied
-        # update metadata, then set the value of the field to an empty string.
-        # An empty value results in an "rm" (remove) operation for that field
-        # being sent to N2T.
-        for k, v in n2t_meta.items():
-            if k not in res:
-                res[k] = [None, v]
-            else:
-                res[k][1] = v
-        return res
-
-
-    def prepare_n2t_metadata(self, identifier:ezidapp.models.identifier, n2t_meta:typing.Optional[dict]=None)->dict:
-        '''Prepare metadata for sending to N2T
-
-        Returns a dictionary of metadata for identifier that can be sent to
-        N2T using impl.noid_egg.setElements(identifier.identifier, m) to
-        set or update the N2T entry for identifier.
-
-        Metadata is sent to N2T for all states except Reserved DOIs, for which N2T is generally null.
-        '''
-        _legacy = identifier.toLegacy()
-        # See proc_binder.update
-        if n2t_meta is None:
-            # Retrieve the existing metadata from N2T
-            n2t_meta = impl.noid_egg.getElements(identifier.identifier)
-        # if no metadata on N2T then initialize a blank for population.
-        if n2t_meta is None:
-            n2t_meta = {}
-
-        if identifier.isReserved:
-            #special case - reserved  - do nothing
-            log.info("Reserved DOIs have null N2T metadata.")
-            # To delete metadata on N2T, send keys with empty values, but we don't want to
-            # delete all the keys since that has the effect of deleting the identifier from N2T.
-            #for k in n2t_meta:
-            #    n2t_meta[k] = ""
-            #return n2t_meta
-            return {}
-
-        # First, update m with provided metadata
-        for k, v in list(_legacy.items()):
-            # If the provided metadata matches existing, then ignore
-            if n2t_meta.get(k) == v:
-                del n2t_meta[k]
-            # Otherwise add property to list for sending back to N2T
-            else:
-                n2t_meta[k] = v
-        # If properties retrieved from N2T are not present in the supplied
-        # update metadata, then set the value of the field to an empty string.
-        # An empty value results in an "rm" (remove) operation for that field
-        # being sent to N2T.
-        for k in list(n2t_meta.keys()):
-            if k not in _legacy:
-                n2t_meta[k] = ""
-        return n2t_meta
-
-
     def handle_show(self, *args, **opts):
         def jsonable_instance(o):
             if o is None:
@@ -318,9 +204,6 @@ def tstamp_to_text(t):
             # but we want to futz around with the cm section and other fields for each instance.
             entry = jsonable_instance(identifier)
             entry["isAgentPid"] = identifier.isAgentPid
-            if opts["legacy"]:
-                # Get the "legacy" format, which is used for sending to N2T binder
-                entry["legacy"] = identifier.toLegacy()
             if opts["expanded"]:
                 for field_name in expand_fields:
                     entry["fields"][field_name] = jsonable_instance(getattr(identifier, field_name))
@@ -340,31 +223,6 @@ def tstamp_to_text(t):
                     entry["cm_eq_metadata"] = _mequal
                 except zlib.error:
                     log.info("No cm section in %s", identifier.identifier)
-            n2t_meta = None
-            if opts["N2T"]:
-                # Retrieve entry from N2T
-                n2t_meta = impl.noid_egg.getElements(identifier.identifier)
-                entry["n2t"] = n2t_meta
-            if opts["sync"]:
-                _legacy = identifier.toLegacy()
-                # See proc_binder.update
-                # Retrieve the existing metadata from N2T
-                m = self.prepare_n2t_metadata(identifier, n2t_meta)
-                if len(m) > 0:
-                    log.warning("Updating N2T metadata for %s", identifier.identifier)
-                    log.info("Pending updates for %s:\n%s", identifier.identifier, m)
-                    self.stdout.write(f"About to update {identifier.identifier} !")
-                    response = input("Enter Y to continue, anything else aborts: ")
-                    if response.strip() == 'Y':
-                        impl.noid_egg.setElements(identifier.identifier, m)
-                        ##
-                        # Retrieve the updated metadata and add to the entry
-                        entry["n2t_updated"] = impl.noid_egg.getElements(identifier.identifier)
-                    else:
-                        self.stdout.write("Aborted.")
-                else:
-                    log.info("No pending updates for %s", identifier.identifier)
-
             entries.append(entry)
         self.stdout.write(json.dumps(entries, indent=2, sort_keys=True))
 
@@ -422,14 +280,10 @@ def handle_list(self, *args, **opts):
             identifier_class = ezidapp.models.identifier.Identifier
         identifiers = identifier_class.objects.filter(**_filter).order_by("-createTime")[:max_rows]
         dfields = _fields
-        if opts.get("compare", False):
-            dfields.append('n2t')
         writer = csv.DictWriter(self.stdout, dfields, dialect='excel')
         writer.writeheader()
         for identifier in identifiers:
             row = django.forms.models.model_to_dict(identifier, fields=_fields)
-            if opts.get('compare', False):
-                row['n2t'] = self.diff_n2t(identifier)
             writer.writerow(row)
 
 
@@ -476,69 +330,6 @@ def handle_metrics(self, *args, **opts):
             for row in cursor.fetchall():
                 writer.writerow(row)
 
-
-    def handle_syncmeta(self, *args, **opts):
-        '''For each line in `from`:
-        update metadata
-        '''
-        fn_src = opts.get('from')
-        fn_dst = fn_src + ".json"
-        start_at = opts.get("start", None)
-        log.info("Recording changes to %s", fn_dst)
-        identifiers = []
-        add_id = True
-        if start_at is not None:
-            add_id = False
-            start_at = start_at.strip()
-        with open(fn_src) as _src:
-            for row in _src:
-                if row.startswith('ark:') or row.startswith('doi:'):
-                    if not add_id:
-                        if row.strip() == start_at:
-                            add_id = True
-                    if add_id:
-                        identifiers.append(row.strip())
-        log.info("Loaded %s identifiers from %s", len(identifiers), fn_src)
-        log.info("Loading status...")
-        with open(fn_dst, 'a') as f_dest:
-            for pid in identifiers:
-                self.stdout.write(pid)
-                result = {'pid':pid, 'original': {}, 'change': {}, 'updated':{}}
-                identifier = ezidapp.models.identifier.SearchIdentifier.objects.get(identifier=pid)
-                if identifier is None:
-                    log.error('Identifier %s could not be loaded!', pid)
-                    break
-                if identifier.isDatacite:
-                    # handle datacite target url
-                    doi = identifier.identifier[4:]
-                    datacenter = str(identifier.datacenter)
-                    log.info("Setting target for %s (%s) to %s", doi, datacenter, identifier.resolverTarget)
-                    r = impl.datacite.setTargetUrl(doi, identifier.resolverTarget, datacenter)
-                    if r is not None:
-                        # There was a failure in the request
-                        log.error("Failed to set target url for DataCite DOI: %s", doi)
-                    pass
-                elif identifier.isCrossref:
-                    # handle crossref target url
-                    pass
-                result['original'] = impl.noid_egg.getElements(identifier.identifier)
-                n2t_meta = copy.deepcopy(result['original'])
-                result['change'] = self.prepare_n2t_metadata(identifier, n2t_meta=n2t_meta)
-                self.stdout.write(json.dumps(result['change']))
-                if result['change'] != {}:
-                    # Send update request
-                    impl.noid_egg.setElements(identifier.identifier, result['change'])
-                    # Retrieve the updated n2t meta
-                    result['updated'] = impl.noid_egg.getElements(identifier.identifier)
-                else:
-                    # no change
-                    result['updated'] = result['original']
-                f_dest.write(json.dumps(result))
-                f_dest.write("\n")
-                f_dest.flush()
-
-
-
     def handle(self, *args, **opts):
         operation = opts['operation']
         if operation == 'show':
@@ -552,7 +343,5 @@ def handle(self, *args, **opts):
             self.handle_resolve(*args, **opts)
         elif operation == 'metrics':
             self.handle_metrics(*args, **opts)
-        elif operation =='syncmeta':
-            self.handle_syncmeta(*args, **opts)