Skip to content

Commit

Permalink
Merge pull request #731 from CDLUC3/develop
Browse files Browse the repository at this point in the history
Merge Develop to main for removing n2t/binder code
  • Loading branch information
jsjiang authored Sep 11, 2024
2 parents 6ea3bd3 + be3a0c1 commit f144cad
Show file tree
Hide file tree
Showing 21 changed files with 49 additions and 1,112 deletions.
3 changes: 0 additions & 3 deletions ansible/group_vars/all
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ admin_username: "{{ ssm_params['admin_username'] }}"

allocator_cdl_password: "{{ ssm_params['allocator_cdl_password'] }}"
allocator_purdue_password: "{{ ssm_params['allocator_purdue_password'] }}"
binder_url: "{{ ssm_params['binder_url'] }}"
binder_username: "{{ ssm_params['binder_username'] }}"
binder_password: "{{ ssm_params['binder_password'] }}"
cloudwatch_instance_name: "{{ ansible_facts.hostname }}"
crossref_username: "{{ ssm_params['crossref_username'] }}"
crossref_password: "{{ ssm_params['crossref_password'] }}"
Expand Down
2 changes: 0 additions & 2 deletions ansible/test_vars.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@
- name: return a single param based on ssm_root_path as variable
##debug: msg="{{ ssm_params['database_host'] }}"
debug: msg="database_host {{ database_host }}"
- name: return binder_url
debug: msg="{{ binder_url }}"
- name: return resolver_ark
debug: msg="{{ resolver_ark }}"

1 change: 0 additions & 1 deletion ezidapp/management/commands/check-ezid.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@
}

queueType = {
'binder': ezidapp.models.async_queue.BinderQueue,
'crossref': ezidapp.models.async_queue.CrossrefQueue,
'datacite': ezidapp.models.async_queue.DataciteQueue,
'search': ezidapp.models.async_queue.SearchIndexerQueue
Expand Down
8 changes: 0 additions & 8 deletions ezidapp/management/commands/diag-db-stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,6 @@ def print_identifier(self, identifier):
# print(id_model)
# pprint.pp(id_model.cm)

print('-' * 100)

impl.enqueue.enqueueBinderIdentifier(
identifier=id_model.identifier,
operation='update',
blob={'x': 'y'},
)

# impl.nog.util.print_table(row_list, log.info)

def print_all(self):
Expand Down
213 changes: 1 addition & 212 deletions ezidapp/management/commands/diag-identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
This command does not alter any information in the database, and should be safe to run
at any time, including a running production instance.
Note however, that this command MAY alter the information in N2T when the --sync option
is used. Confirmation is requested before any metadata updates are propagated to N2T.
"""

import argparse
Expand All @@ -36,7 +34,6 @@
import ezidapp.models.identifier
import ezidapp.models.user
import impl.datacite
import impl.noid_egg

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -67,7 +64,7 @@ def add_arguments(self, parser:argparse.ArgumentParser):

_show = subparsers.add_parser(
"show",
help=("Show available metadata for an identifier, and optionally sync the N2T record.\n"
help=("Show available metadata for an identifier.\n"
"Example:\n"
" Default:\n"
" ./manage.py diag-identifier show ark:/62930/d1n739\n")
Expand All @@ -84,12 +81,6 @@ def add_arguments(self, parser:argparse.ArgumentParser):
action='store_true',
help='Show Identifier instead of SearchIdentifier table entry',
)
_show.add_argument(
'-y',
'--legacy',
action='store_true',
help='Show legacy form of identifier record',
)
_show.add_argument(
'-m',
'--cm',
Expand All @@ -108,17 +99,6 @@ def add_arguments(self, parser:argparse.ArgumentParser):
action='store_true',
help='Convert timestamps to textual time representation',
)
_show.add_argument(
'-N',
'--N2T',
action='store_true',
help='Retrieve record from N2T if available',
)
_show.add_argument(
'--sync',
action='store_true',
help="Synchronize the N2T entry with metadata from the database.",
)

_list = subparsers.add_parser(
"list",
Expand Down Expand Up @@ -154,11 +134,6 @@ def add_arguments(self, parser:argparse.ArgumentParser):
default=[],
help="Comma separated list of fields in addition to identifier to list."
)
_list.add_argument(
'--compare',
action='store_true',
help='Show difference between EZID and N2T metadata.',
)
_list.add_argument(
'-m',
'--max_rows',
Expand Down Expand Up @@ -199,95 +174,6 @@ def add_arguments(self, parser:argparse.ArgumentParser):
help="Ending date for metrics"
)

_syncmeta = subparsers.add_parser(
"syncmeta",
help=("Sends metadata to N2T for each row text file source. Rows starting with space or '#' are ignored.\n"
"Example:\n"
" ./manage.py diag-identifier syncmeta -f pid_list.txt"
)
)
_syncmeta.add_argument(
'-f',
'--from',
type=str,
help="Text file with one identifier per line.",
required=True
)
_syncmeta.add_argument(
'-s',
'--start',
type=str,
help="Identifier in list to start from",
default=None
)


def diff_n2t(self, identifier:ezidapp.models.identifier)->dict:
res = {}
n2t_meta = impl.noid_egg.getElements(identifier.identifier)
if n2t_meta is None:
n2t_meta = {}
_legacy = identifier.toLegacy()
for k, v in _legacy.items():
res[k] = [v, None]
# If properties retrieved from N2T are not present in the supplied
# update metadata, then set the value of the field to an empty string.
# An empty value results in an "rm" (remove) operation for that field
# being sent to N2T.
for k, v in n2t_meta.items():
if k not in res:
res[k] = [None, v]
else:
res[k][1] = v
return res


def prepare_n2t_metadata(self, identifier:ezidapp.models.identifier, n2t_meta:typing.Optional[dict]=None)->dict:
'''Prepare metadata for sending to N2T
Returns a dictionary of metadata for identifier that can be sent to
N2T using impl.noid_egg.setElements(identifier.identifier, m) to
set or update the N2T entry for identifier.
Metadata is sent to N2T for all states except Reserved DOIs, for which N2T is generally null.
'''
_legacy = identifier.toLegacy()
# See proc_binder.update
if n2t_meta is None:
# Retrieve the existing metadata from N2T
n2t_meta = impl.noid_egg.getElements(identifier.identifier)
# if no metadata on N2T then initialize a blank for population.
if n2t_meta is None:
n2t_meta = {}

if identifier.isReserved:
#special case - reserved - do nothing
log.info("Reserved DOIs have null N2T metadata.")
# To delete metadata on N2T, send keys with empty values, but we don't want to
# delete all the keys since that has the effect of deleting the identifier from N2T.
#for k in n2t_meta:
# n2t_meta[k] = ""
#return n2t_meta
return {}

# First, update m with provided metadata
for k, v in list(_legacy.items()):
# If the provided metadata matches existing, then ignore
if n2t_meta.get(k) == v:
del n2t_meta[k]
# Otherwise add property to list for sending back to N2T
else:
n2t_meta[k] = v
# If properties retrieved from N2T are not present in the supplied
# update metadata, then set the value of the field to an empty string.
# An empty value results in an "rm" (remove) operation for that field
# being sent to N2T.
for k in list(n2t_meta.keys()):
if k not in _legacy:
n2t_meta[k] = ""
return n2t_meta


def handle_show(self, *args, **opts):
def jsonable_instance(o):
if o is None:
Expand Down Expand Up @@ -318,9 +204,6 @@ def tstamp_to_text(t):
# but we want to futz around with the cm section and other fields for each instance.
entry = jsonable_instance(identifier)
entry["isAgentPid"] = identifier.isAgentPid
if opts["legacy"]:
# Get the "legacy" format, which is used for sending to N2T binder
entry["legacy"] = identifier.toLegacy()
if opts["expanded"]:
for field_name in expand_fields:
entry["fields"][field_name] = jsonable_instance(getattr(identifier, field_name))
Expand All @@ -340,31 +223,6 @@ def tstamp_to_text(t):
entry["cm_eq_metadata"] = _mequal
except zlib.error:
log.info("No cm section in %s", identifier.identifier)
n2t_meta = None
if opts["N2T"]:
# Retrieve entry from N2T
n2t_meta = impl.noid_egg.getElements(identifier.identifier)
entry["n2t"] = n2t_meta
if opts["sync"]:
_legacy = identifier.toLegacy()
# See proc_binder.update
# Retrieve the existing metadata from N2T
m = self.prepare_n2t_metadata(identifier, n2t_meta)
if len(m) > 0:
log.warning("Updating N2T metadata for %s", identifier.identifier)
log.info("Pending updates for %s:\n%s", identifier.identifier, m)
self.stdout.write(f"About to update {identifier.identifier} !")
response = input("Enter Y to continue, anything else aborts: ")
if response.strip() == 'Y':
impl.noid_egg.setElements(identifier.identifier, m)
##
# Retrieve the updated metadata and add to the entry
entry["n2t_updated"] = impl.noid_egg.getElements(identifier.identifier)
else:
self.stdout.write("Aborted.")
else:
log.info("No pending updates for %s", identifier.identifier)

entries.append(entry)
self.stdout.write(json.dumps(entries, indent=2, sort_keys=True))

Expand Down Expand Up @@ -422,14 +280,10 @@ def handle_list(self, *args, **opts):
identifier_class = ezidapp.models.identifier.Identifier
identifiers = identifier_class.objects.filter(**_filter).order_by("-createTime")[:max_rows]
dfields = _fields
if opts.get("compare", False):
dfields.append('n2t')
writer = csv.DictWriter(self.stdout, dfields, dialect='excel')
writer.writeheader()
for identifier in identifiers:
row = django.forms.models.model_to_dict(identifier, fields=_fields)
if opts.get('compare', False):
row['n2t'] = self.diff_n2t(identifier)
writer.writerow(row)


Expand Down Expand Up @@ -476,69 +330,6 @@ def handle_metrics(self, *args, **opts):
for row in cursor.fetchall():
writer.writerow(row)


def handle_syncmeta(self, *args, **opts):
'''For each line in `from`:
update metadata
'''
fn_src = opts.get('from')
fn_dst = fn_src + ".json"
start_at = opts.get("start", None)
log.info("Recording changes to %s", fn_dst)
identifiers = []
add_id = True
if start_at is not None:
add_id = False
start_at = start_at.strip()
with open(fn_src) as _src:
for row in _src:
if row.startswith('ark:') or row.startswith('doi:'):
if not add_id:
if row.strip() == start_at:
add_id = True
if add_id:
identifiers.append(row.strip())
log.info("Loaded %s identifiers from %s", len(identifiers), fn_src)
log.info("Loading status...")
with open(fn_dst, 'a') as f_dest:
for pid in identifiers:
self.stdout.write(pid)
result = {'pid':pid, 'original': {}, 'change': {}, 'updated':{}}
identifier = ezidapp.models.identifier.SearchIdentifier.objects.get(identifier=pid)
if identifier is None:
log.error('Identifier %s could not be loaded!', pid)
break
if identifier.isDatacite:
# handle datacite target url
doi = identifier.identifier[4:]
datacenter = str(identifier.datacenter)
log.info("Setting target for %s (%s) to %s", doi, datacenter, identifier.resolverTarget)
r = impl.datacite.setTargetUrl(doi, identifier.resolverTarget, datacenter)
if r is not None:
# There was a failure in the request
log.error("Failed to set target url for DataCite DOI: %s", doi)
pass
elif identifier.isCrossref:
# handle crossref target url
pass
result['original'] = impl.noid_egg.getElements(identifier.identifier)
n2t_meta = copy.deepcopy(result['original'])
result['change'] = self.prepare_n2t_metadata(identifier, n2t_meta=n2t_meta)
self.stdout.write(json.dumps(result['change']))
if result['change'] != {}:
# Send update request
impl.noid_egg.setElements(identifier.identifier, result['change'])
# Retrieve the updated n2t meta
result['updated'] = impl.noid_egg.getElements(identifier.identifier)
else:
# no change
result['updated'] = result['original']
f_dest.write(json.dumps(result))
f_dest.write("\n")
f_dest.flush()



def handle(self, *args, **opts):
operation = opts['operation']
if operation == 'show':
Expand All @@ -552,7 +343,5 @@ def handle(self, *args, **opts):
self.handle_resolve(*args, **opts)
elif operation == 'metrics':
self.handle_metrics(*args, **opts)
elif operation =='syncmeta':
self.handle_syncmeta(*args, **opts)


Loading

0 comments on commit f144cad

Please sign in to comment.