Skip to content

Commit

Permalink
Merge branch 'main' of github.com:caltechlibrary/irdmtools into gh-pages
Browse files Browse the repository at this point in the history
  • Loading branch information
rsdoiel committed Sep 14, 2023
2 parents 7b085d4 + 8575da6 commit 1628946
Show file tree
Hide file tree
Showing 37 changed files with 3,483 additions and 286 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,5 @@ test-*.txt
*.yaml
s3_uploads/
migration/
testdata
*.bash
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ authors:


repository-code: "https://github.com/caltechlibrary/irdmtools"
version: 0.0.48
version: 0.0.49
license-url: "https://caltechlibrary.github.io/irdmtools/LICENSE"
keywords: [ "institutional repository", "data
management", "Invenio", "Invenio-RDM" ]
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ RELEASE_DATE = $(shell date +%Y-%m-%d)

RELEASE_HASH=$(shell git log --pretty=format:'%h' -n 1)

PROGRAMS = rdmutil eprint2rdm doi2rdm people2vocabulary # $(shell ls -1 cmd)
PROGRAMS = rdmutil eprint2rdm eprintrest doi2rdm people2vocabulary # $(shell ls -1 cmd)

MAN_PAGES = $(shell ls -1 *.1.md | sed -E 's/\.1.md/.1/g')

Expand Down
2 changes: 1 addition & 1 deletion about.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

<section>
<h1 id="about-this-software">About this software</h1>
<h2 id="irdmtools-0.0.48">irdmtools 0.0.48</h2>
<h2 id="irdmtools-0.0.49">irdmtools 0.0.49</h2>
<h3 id="authors">Authors</h3>
<ul>
<li>R. S. Doiel</li>
Expand Down
4 changes: 2 additions & 2 deletions about.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ authors:
orcid: "https://orcid.org/0000-0001-9266-5146"

repository-code: "https://github.com/caltechlibrary/irdmtools"
version: 0.0.48
version: 0.0.49
license-url: "https://caltechlibrary.github.io/irdmtools/LICENSE"
keywords: [ "institutional repository", "data
management", "Invenio", "Invenio-RDM" ]
Expand All @@ -24,7 +24,7 @@ management", "Invenio", "Invenio-RDM" ]
About this software
===================

## irdmtools 0.0.48
## irdmtools 0.0.49

### Authors

Expand Down
20 changes: 16 additions & 4 deletions check_transferred_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,24 @@
reader = csv.DictReader(f)
for row in reader:
status = row['record_status']
if status != 'restricted-duplicate':
if status == 'public':
eprintid = row['eprintid']
if eprintid not in all_rdm_records:
all_rdm_records.append(eprintid)
if eprintid not in eprint_mapping:
eprint_mapping[row['eprintid']] = row['rdmid']
else:
eprint_mapping[row['eprintid']] = [eprint_mapping[row['eprintid']] ,row['rdmid']]
print('duplicate', eprintid, row['rdmid'])
#if eprintid not in eprint_mapping:
# eprint_mapping[row['eprintid']] = row['rdmid']
#else:
# eprint_mapping[row['eprintid']] = [eprint_mapping[row['eprintid']] ,row['rdmid']]
print("Number of RDM records: ",len(all_rdm_records))

print('Missing Eprints records:')
count = 0

for record in eprints_records:
if record not in all_rdm_records:
print(record)
count += 1

print('Number of missing records: ',count)
166 changes: 166 additions & 0 deletions cmd/eprintrest/eprintrest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// eprintrest is a command line program that re-creates a EPrints 3.x REST
// API running on localhost. It requires access to the repository's
// "archives" directory as well as the MySQL database.
//
// @author R. S. Doiel, <[email protected]>
// @author Tom Morrell, <[email protected]>
//
// Copyright (c) 2023, Caltech
// All rights not granted herein are expressly reserved by Caltech.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its contributors
// may be used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
package main

import (
"flag"
"fmt"
"os"
"path"

// Caltech Library packages
"github.com/caltechlibrary/irdmtools"
)

var (
helpText = `%{app_name}(1) irdmtools user manual | version {version} {release_hash}
% R. S. Doiel and Tom Morrell
% {release_date}
# NAME
{app_name}
# SYNOPSIS
{app_name} [OPTIONS]
# DESCRIPTION
{app_name} is a Caltech Library centric localhost web service
that creates a funcionally similar replica of the EPrints REST API
for EPrints 3.3.x based repositories. It uses the path to the
"archives" directory and a MySQL Database for the repository.
It only supports "archive" eprint.eprint_status records and
only the complete XML. Start up time is slow because it builds
the data structures representing the content in memory. This
makes the response times to request VERY fast compared to
the EPrints REST API.
NOTE: the rest API does not enforce user permissions, restrictions
or roles. It is a minimal READ ONLY re-implementation of the EPrints 3.3
REST API!
The application is configured from the environment. The following
environment variables need to be set.
REPO_ID
: The repository id string (e.g. caltechauthors). Also the name of the database for the repository.
EPRINT_ARCHIVES_PATH
: A path to the "archives" directory holding your repository content
(e.g. /usr/local/eprints/archives)
DB_USER
: The user name needed to access the MySQL database[^1]
DB_PASSWORD
: The password needed to access the MySQL database[^1]
REST_PORT
: The localhost port to use for the read only REST API.
[^1]: MySQL, like this REST service assumes to be running on localhost.
# OPTIONS
-help
: display help
-license
: display license
-version
: display version
# EXAMPLE
This is an example environment
~~~
REPO_ID="caltechauthors"
EPRINT_ARCHIVES_PATH="/code/eprints3.3/archives"
REST_PORT=80
DB_USER="eprints"
DB_PASSWORD="something_secret_here"
~~~
Running the localhost REST API clone
~~~
{app_name}
~~~
`
)

func main() {
appName := path.Base(os.Args[0])
// NOTE: The following are set when version.go is generated
version := irdmtools.Version
releaseDate := irdmtools.ReleaseDate
releaseHash := irdmtools.ReleaseHash
fmtHelp := irdmtools.FmtHelp

showHelp, showVersion, showLicense := false, false, false
flag.BoolVar(&showHelp, "help", false, "display help")
flag.BoolVar(&showVersion, "version", false, "display version")
flag.BoolVar(&showLicense, "license", false, "display license")

flag.Parse()

if showHelp {
fmt.Fprintf(os.Stdout, "%s\n", fmtHelp(helpText, appName, version, releaseDate, releaseHash))
os.Exit(0)
}
if showVersion {
fmt.Fprintf(os.Stdout, "%s %s %s\n", appName, version, releaseHash)
os.Exit(0)
}
if showLicense {
fmt.Fprintf(os.Stdout, "%s\n", irdmtools.LicenseText)
os.Exit(0)
}

// Create a appity object
app := new(irdmtools.EPrintRest)
if err := app.Run(os.Stdin, os.Stdout, os.Stderr); err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1)
}
}
4 changes: 2 additions & 2 deletions codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
"license": "https://caltechlibrary.github.io/irdmtools/LICENSE",
"codeRepository": "https://github.com/caltechlibrary/irdmtools",
"dateCreated": "2022-10-27",
"dateRelease": "2023-09-12",
"dateRelease": "2023-09-14",
"issueTracker": "https://github.com/caltechlibrary/irdmtools/issues",
"name": "irdmtools",
"version": "0.0.48",
"version": "0.0.49",
"description": "Tools for working with institutional repositories and data management systems. Current implementation targets Invenio-RDM.",
"applicationCategory": "library science",
"releaseNotes": "This is a proof of concept",
Expand Down
4 changes: 2 additions & 2 deletions doi2rdm.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%doi2rdm(1) irdmtools user manual | version 0.0.48 fb9e58e
%doi2rdm(1) irdmtools user manual | version 0.0.49 265a46b
% R. S. Doiel and Tom Morrell
% 2023-09-12
% 2023-09-14

# NAME

Expand Down
4 changes: 2 additions & 2 deletions eprint2rdm.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%eprint2rdm(1) irdmtools user manual | version 0.0.48 fb9e58e
%eprint2rdm(1) irdmtools user manual | version 0.0.49 265a46b
% R. S. Doiel and Tom Morrell
% 2023-09-12
% 2023-09-14

# NAME

Expand Down
33 changes: 15 additions & 18 deletions eprint2rdm.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,38 +359,35 @@ func customFieldsMetadataFromEPrint(eprint *eprinttools.EPrint, rec *simplified.
// instread of simplified.go because I need to gaurantee duplicate subjects don't get added
// as part of the merging of keywords and subjects from EPrints.
if eprint.Keywords != "" || (eprint.Subjects != nil && eprint.Subjects.Length() > 0) {
if rec.Metadata.Subjects == nil {
rec.Metadata.Subjects = []*simplified.Subject{}
}
subjectsTest := map[string]bool{}
subjectStrings := []string{}
if eprint.Keywords != "" {
keywords := strings.Split(eprint.Keywords, ";")
for _, keyword := range keywords {
if _, duplicate := subjectsTest[keyword]; ! duplicate {
subjectsTest[keyword] = true
rec.Metadata.Subjects = append(rec.Metadata.Subjects, &simplified.Subject{
Subject: strings.TrimSpace(keyword),
})
val := strings.TrimSpace(keyword)
if val != "" && val != "cls" {
subjectStrings = append(subjectStrings, val)
}
}
}
for i := 0; i < eprint.Subjects.Length(); i++ {
subject := eprint.Subjects.IndexOf(i)
//NOTE: irdmtools issue #51, ignore cls as a subject, this was an EPrints-ism
// needed for Caltech Library only.
if subject.Value != "" && subject.Value != "cls" {
if _, duplicate := subjectsTest[subject.Value]; ! duplicate {
subjectsTest[subject.Value] = true
rec.Metadata.Subjects = append(rec.Metadata.Subjects, &simplified.Subject{
Subject: strings.TrimSpace(subject.Value),
})
val := strings.TrimSpace(subject.Value)
if val != "" && val != "cls" {
subjectStrings = append(subjectStrings, val)
}
}
if len(subjectStrings) > 0 {
duplicates := map[string]bool{}
for _, subject := range subjectStrings {
if _, duplicate := duplicates[subject]; ! duplicate {
AddSubject(rec, subject)
}
duplicates[subject] = true
}
}
}

// FIXME: Handle non-subject keyswords

return nil
}

Expand Down
Loading

0 comments on commit 1628946

Please sign in to comment.