Skip to content

Commit

Permalink
Add collection_handles column to CSV
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Dolski committed Jan 4, 2024
1 parent 97dffa9 commit 60dcac2
Show file tree
Hide file tree
Showing 9 changed files with 351 additions and 68 deletions.
16 changes: 13 additions & 3 deletions app/util/csv_exporter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,17 @@ def export_unit(unit, elements: [])
# @return [String]
#
def select_clause(elements)
columns = ["items.id", "handles.suffix"]
columns = ["items.id", "item_handles.suffix AS item_handle"]
# collection_handles column
columns << "array_to_string(
array(
SELECT CONCAT('#{Handle.prefix}/', h.suffix) AS suffix
FROM handles h
LEFT JOIN collection_item_memberships cim ON cim.item_id = items.id
WHERE cim.item_id = items.id
AND h.collection_id = cim.collection_id
ORDER BY cim.primary DESC NULLS LAST
), '||') AS collection_handles\n"
# files column
columns << "array_to_string(
array(
Expand Down Expand Up @@ -192,7 +202,7 @@ def from_clause
end

def join_clauses
"LEFT JOIN handles ON handles.item_id = items.id\n" +
"LEFT JOIN handles item_handles ON item_handles.item_id = items.id\n" +
"LEFT JOIN collection_item_memberships cim ON cim.item_id = items.id "
end

Expand All @@ -209,4 +219,4 @@ def to_csv(elements, results)
end
end

end
end
119 changes: 98 additions & 21 deletions app/util/csv_importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,27 @@
#
# The first column, `id`, refers to an item's internal database ID. The second
# column, `handle`, refers to an item's handle. (These columns cannot be
# modified.) The next several columns refer to various system-level item
# properties. The remaining columns correspond to the elements in the effective
# modified.)
#
# The third column, `collection_handles`, refers to the handles of the
# collections to which the item belongs. There may be multiple handles listed
# in this column, separated by a double pipe (`||`), as an item may belong to
# multiple collections. If this is the case, the first handle listed will
# correspond to the item's primary collection. Whether or not there are
# multiple handles, the first handle in this column must match that of the
# collection associated with the import.
#
# The next several columns refer to various other item properties
# These columns are collectively listed in {CsvImporter#REQUIRED_COLUMNS}.
#
# The remaining columns correspond to the elements in the effective
# {MetadataProfile} of the collection that apply to the item.
#
# All columns are required except metadata columns. When a metadata column is
# missing, the corresponding metadata elements of the items in the CSV file
# will be left unchanged. For new items in the CSV file, a missing metadata
# column may be an error depending on whether it is required or not.
# All columns are required to exist (even if empty) except metadata columns.
# When a metadata column is missing, the corresponding metadata elements of the
# items in the CSV file will be left unchanged. For new items in the CSV file,
# a missing metadata column may be an error depending on whether it is required
# or not.
#
# ## Non-header rows
#
Expand All @@ -48,6 +61,13 @@
#
# The next group of cells correspond to various system properties of the item:
#
# * `handle` The item's handle. This is for informational
# purposes only--it cannot be updated by
# changing the value.
# * `collection_handle` The handle of the item's owning {Collection}.
# Changing the value to the handle of some other
# collection will cause the item to be moved
# into it.
# * `files` Names of all files attached to an item. For
# uploads, this may be a path relative to a
# package root in which the CSV resides.
Expand Down Expand Up @@ -113,11 +133,12 @@
#
class CsvImporter

MULTI_VALUE_DELIMITER = "||"
NEW_ITEM_INDICATOR = "+"
REQUIRED_COLUMNS = %w[id handle files file_descriptions embargo_types
embargo_expirations embargo_exempt_user_groups
embargo_reasons]
MULTI_VALUE_DELIMITER = "||"
NEW_ITEM_INDICATOR = "+"
REQUIRED_COLUMNS = %w[id handle collection_handles files
file_descriptions embargo_types
embargo_expirations embargo_exempt_user_groups
embargo_reasons]

##
# Imports items from a CSV string.
Expand All @@ -126,8 +147,12 @@ class CsvImporter
# @param file_paths [Enumerable<String>] Absolute paths of files that are
# referenced in the CSV's `files`
# column.
# @param submitter [User]
# @param primary_collection [Collection] Collection to import new items into.
# @param submitter [User] The user performing the import.
# @param primary_collection [Collection] Collection associated with the
# import. This may be the collection
# that items are added to if it is
# not overridden by the
# `collection_handles` column.
# @param imported_items [Array<Hash>] For each imported item, whether
# created or updated, a hash
# containing `:item_id` and `:handle`
Expand All @@ -144,6 +169,9 @@ def import(pathname:,
imported_items: [],
print_progress: false,
task: nil)
raise ArgumentError, "Pathname is nil" unless pathname
raise ArgumentError, "File does not exist: #{pathname}" unless File.exist?(pathname)
raise ArgumentError, "Nil submitter argument" unless submitter
num_rows = 0
File.foreach(pathname) do
num_rows += 1
Expand Down Expand Up @@ -174,18 +202,19 @@ def import(pathname:,
file_paths: file_paths)
else
item = Item.find(item_id)
item = update_item(item: item,
submitter: submitter,
element_names: header_row[REQUIRED_COLUMNS.length..],
row: row,
file_paths: file_paths)
item = update_item(item: item,
submitter: submitter,
primary_collection: primary_collection,
element_names: header_row[REQUIRED_COLUMNS.length..],
row: row,
file_paths: file_paths)
end
imported_items << {
item_id: item.id,
handle: item.handle&.handle
}
status_text = "Importing #{num_rows} items from CSV"
row_index += 1
row_index += 1
progress&.report(row_index, status_text)
task&.progress(row_index / (num_rows - 1).to_f,
status_text: status_text)
Expand Down Expand Up @@ -213,6 +242,10 @@ def create_item(submitter:,
stage: Item::Stages::APPROVED,
event_description: "Item imported from CSV.").execute
item.assign_handle
move_into_collections(item: item,
primary_collection: primary_collection,
collection_handles: row[2],
submitter: submitter)
associate_bitstreams(item: item,
row: row,
file_paths: file_paths) if file_paths
Expand All @@ -227,12 +260,17 @@ def create_item(submitter:,

def update_item(item:,
submitter:,
primary_collection:,
element_names:,
row:,
file_paths: nil)
file_paths: nil)
UpdateItemCommand.new(item: item,
user: submitter,
description: "Updated via CSV").execute do
move_into_collections(item: item,
primary_collection: primary_collection,
collection_handles: row[2],
submitter: submitter)
associate_bitstreams(item: item,
row: row,
file_paths: file_paths)
Expand Down Expand Up @@ -350,6 +388,45 @@ def associate_embargoes(item:, row:)
end
end

##
# @param item [Item] Item to move.
# @param primary_collection [Collection] Collection associated with the
# import.
# @param collection_handles [String] Value of the `collection_handles`
# column.
# @param submitter [User] User performing the import.
#
def move_into_collections(item:,
primary_collection:,
collection_handles:,
submitter:)
return if collection_handles.blank?
collection_handles = collection_handles.split(MULTI_VALUE_DELIMITER)
return if collection_handles.empty?
item.collection_item_memberships.destroy_all
collection_handles.each_with_index do |handle, index|
suffix = handle.split("/").last.strip
handle = Handle.find_by_suffix(suffix)
unless handle
raise ArgumentError, "Collection with handle #{handle} does not exist"
end
collection = handle.collection
if index == 0 && collection != primary_collection
raise ArgumentError, "The first handle in the collection_handles "\
"column must be that of the collection "\
"associated with the import"
end
unless submitter.effective_collection_admin?(collection)
raise ArgumentError, "User #{submitter} does not have permission to "\
"add items to #{collection.title}"
end
unless item.collections.include?(collection)
item.collection_item_memberships.build(collection: handle.collection,
primary: index == 0).save!
end
end
end

##
# @param row [Hash<String>]
# @param submission_profile [SubmissionProfile]
Expand All @@ -364,7 +441,7 @@ def validate_header(row:, submission_profile:)
end
REQUIRED_COLUMNS.each_with_index do |column, index|
if row[index] != column
raise ArgumentError, "Missing #{column} column"
raise ArgumentError, "Missing #{column} column (expected at position #{index + 1})"
end
end
end
Expand Down
4 changes: 2 additions & 2 deletions test/fixtures/files/csv/illegal_element.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
id,handle,files,file_descriptions,embargo_types,embargo_expirations,embargo_exempt_user_groups,embargo_reasons,bogus
+,"","","","","","","",Value of bogus element
id,handle,collection_handles,files,file_descriptions,embargo_types,embargo_expirations,embargo_exempt_user_groups,embargo_reasons,bogus
+,"","","","","","","","",Value of bogus element
4 changes: 2 additions & 2 deletions test/fixtures/files/csv/new.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
id,handle,files,file_descriptions,embargo_types,embargo_expirations,embargo_exempt_user_groups,embargo_reasons,dc:title
+,"",hello.txt,"","","","","",New CSV Item
id,handle,collection_handles,files,file_descriptions,embargo_types,embargo_expirations,embargo_exempt_user_groups,embargo_reasons,dc:title
+,"","",hello.txt,"","","","","",New CSV Item
4 changes: 2 additions & 2 deletions test/fixtures/files/packages/csv/missing_file/package.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
id,handle,filenames,file_descriptions,embargo_types,embargo_expirations,embargo_exempt_user_groups,embargo_reasons,dc:title,dc:subject,dc:creator,dc:contributor,dc:description:abstract,dc:date:issued,dc:identifier:uri,dc:type
+,"",missing.txt,Hello world,"","","","",New Item 1,Subject 1||Subject 2,,,,,,
id,handle,collection_handles,filenames,file_descriptions,embargo_types,embargo_expirations,embargo_exempt_user_groups,embargo_reasons,dc:title,dc:subject,dc:creator,dc:contributor,dc:description:abstract,dc:date:issued,dc:identifier:uri,dc:type
+,"","",missing.txt,Hello world,"","","","",New Item 1,Subject 1||Subject 2,,,,,,
6 changes: 3 additions & 3 deletions test/fixtures/files/packages/csv/valid_items/package.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
id,handle,files,file_descriptions,embargo_types,embargo_expirations,embargo_exempt_user_groups,embargo_reasons,dc:title,dc:subject,dc:creator,dc:contributor,dc:description:abstract,dc:date:issued,dc:identifier:uri,dc:type
+,,item1/hello.txt||item1/license.txt,Hello world||License file,,,,,New Item 1,Subject 1||Subject 2,,,,,,
+,,item2/hello.txt||item2/license.txt,Hello world||License file,0||1,2045-02-05||2055-03-10,sysadmin||,Reason 1||Reason 2,New Item 2,Subject 1||Subject 2,,,,,,
id,handle,collection_handles,files,file_descriptions,embargo_types,embargo_expirations,embargo_exempt_user_groups,embargo_reasons,dc:title,dc:subject,dc:creator,dc:contributor,dc:description:abstract,dc:date:issued,dc:identifier:uri,dc:type
+,,,item1/hello.txt||item1/license.txt,Hello world||License file,,,,,New Item 1,Subject 1||Subject 2,,,,,,
+,,,item2/hello.txt||item2/license.txt,Hello world||License file,0||1,2045-02-05||2055-03-10,sysadmin||,Reason 1||Reason 2,New Item 2,Subject 1||Subject 2,,,,,,
38 changes: 23 additions & 15 deletions test/fixtures/handles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,66 +2,74 @@

southwest_unit1_collection1:
collection: southwest_unit1_collection1
suffix: 5014
suffix: 5000

southwest_unit1_collection1_item1:
item: southwest_unit1_collection1_item1
suffix: 5000
suffix: 5001

################################### southeast ####################################

# collection handles

southeast_collection1:
collection: southeast_collection1
suffix: 5001
suffix: 5002

southeast_collection2:
collection: southeast_collection2
suffix: 5003

southeast_empty:
collection: southeast_empty
suffix: 5004

# item handles

southeast_item1:
item: southeast_item1
suffix: 5002
suffix: 5005

southeast_approved:
item: southeast_approved
suffix: 5003
suffix: 5006

southeast_awaiting_ingest_into_medusa:
item: southeast_awaiting_ingest_into_medusa
suffix: 5004
suffix: 5007

southeast_in_medusa:
item: southeast_in_medusa
suffix: 5005
suffix: 5008

southeast_described:
item: southeast_described
suffix: 5006
suffix: 5009

southeast_embargoed:
item: southeast_embargoed
suffix: 5007
suffix: 5010

southeast_undescribed:
item: southeast_undescribed
suffix: 5008
suffix: 5011

southeast_submitting:
item: southeast_submitting
suffix: 5009
suffix: 5012

southeast_withdrawn:
item: southeast_withdrawn
suffix: 5010
suffix: 5013

southeast_buried:
item: southeast_buried
suffix: 5011
suffix: 5014

southeast_multiple_bitstreams:
item: southeast_multiple_bitstreams
suffix: 5012
suffix: 5015

southeast_collection1_collection1_item1:
item: southeast_collection1_collection1_item1
suffix: 5013
suffix: 5016
6 changes: 4 additions & 2 deletions test/jobs/import_job_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ class ImportJobTest < ActiveSupport::TestCase
ObjectStore.instance.put_object(key: import.file_key,
path: fixture)

format = ImportJob.perform_now(import: import)
format = ImportJob.perform_now(import: import,
user: users(:southeast_admin))
assert_equal Import::Format::CSV_FILE, format
end

Expand All @@ -72,7 +73,8 @@ class ImportJobTest < ActiveSupport::TestCase
ObjectStore.instance.put_object(key: import.file_key,
path: csv_package)

format = ImportJob.perform_now(import: import)
format = ImportJob.perform_now(import: import,
user: users(:southeast_admin))
assert_equal Import::Format::CSV_PACKAGE, format
end

Expand Down
Loading

0 comments on commit 60dcac2

Please sign in to comment.