Skip to content

Commit

Permalink
Lots of updates related to S3 and image derivative generation
Browse files Browse the repository at this point in the history
  • Loading branch information
elohanlon committed Dec 17, 2024
1 parent a3461e1 commit c3ab328
Show file tree
Hide file tree
Showing 13 changed files with 69 additions and 70 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Hyacinth.DigitalObjectsApp.DigitalObject.Base.getImageUrl = function (pid, type,
if (type == 'scaled') { type = 'full'; }
if (type == 'square') { type = 'featured'; }

return Hyacinth.imageServerUrl + '/iiif/2/' + pid + '/' + type + '/!' + size + ',' + size + '/0/default.jpg';
return Hyacinth.imageServerUrl + '/iiif/2/limited/' + pid + '/' + type + '/!' + size + ',' + size + '/0/default.jpg';
};

Hyacinth.DigitalObjectsApp.DigitalObject.Base.showMediaViewModal = function (pid) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ Hyacinth.DigitalObjectsApp.DigitalObjectSearchResult.prototype.getImageUrl = fun
if(type == 'square') { type = 'featured'; }

if(this.getHyacinthType() == 'asset') {
return Hyacinth.imageServerUrl + '/iiif/2/' + this.getPid() + '/' + type + '/!' + size + ',' + size + '/0/default.jpg';
return Hyacinth.imageServerUrl + '/iiif/2/limited/' + this.getPid() + '/' + type + '/!' + size + ',' + size + '/0/default.jpg';
} else if (this.getHyacinthType() == 'item' && this.getOrderedChildDigitalObjectPids().length > 0) {
return Hyacinth.imageServerUrl + '/iiif/2/' + this.getOrderedChildDigitalObjectPids()[0] + '/' + type + '/!' + size + ',' + size + '/0/default.jpg';
return Hyacinth.imageServerUrl + '/iiif/2/limited/' + this.getOrderedChildDigitalObjectPids()[0] + '/' + type + '/!' + size + ',' + size + '/0/default.jpg';
} else {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@
<% if(digitalObject.getAccessCopyLocation() == null) { %>
N/A
<% } else { %>
<a href="<%= Hyacinth.getLocationOrigin() + '/digital_objects/' + digitalObject.getPid() + '/download_access_copy' %>" target="_blank"><span class="glyphicon glyphicon-download"></span> Download</a>
<a href="<%= Hyacinth.getLocationOrigin() + '/digital_objects/' + digitalObject.getPid() + '/download_access_copy?download=true' %>" target="_blank"><span class="glyphicon glyphicon-download"></span> Download</a>
(<%= digitalObject.getAccessCopyFileSizeString() %>) [<%= digitalObject.getAccessCopyFileSizeInBytes() %> bytes]
<% } %>
</div>
Expand Down
23 changes: 8 additions & 15 deletions app/controllers/concerns/hyacinth/digital_objects/downloads.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@ def download
end

storage_object = Hyacinth::Storage.storage_object_for(
Hyacinth::Utils::PathUtils.ds_location_to_decoded_location_uri(
@digital_object.fedora_object.datastreams['content'].dsLocation
)
@digital_object.fedora_object.datastreams['content'].dsLocation
)

if storage_object.is_a?(Hyacinth::Storage::FileObject)
use_send_file_for_storage_object(storage_object)
# use_send_file_for_storage_object(storage_object)
use_action_controller_live_streaming_for_storage_object(storage_object, response)
else
use_action_controller_live_streaming_for_storage_object(storage_object, response)
end
Expand Down Expand Up @@ -45,17 +44,11 @@ def use_action_controller_live_streaming_for_storage_object(storage_object, resp
break unless resp.stream.connected?
resp.stream.write(chunk)

# NOTE: The section below is commented out for now because it might not be necessary after some
# recent code updates and header updates, but I'm keeping it here for reference in case we
# run into issues and need to uncomment it again later.
# Sleeping doesn't appear to be necessary when streaming S3 objects, but does appear to be
# necessary when sending local files in chunks using the resp.stream.write method. Since
# we're currently using send_file for Hyacinth::Storage::FileObject objects, it's probably
# okay to continue keeping this commented out.
# Prevent server instance from sleeping forever if client disconnects during download.
# Fix for local file downloads hanging when using local Puma rails server. Allow thread to switch as needed
# during the download. Also prevents server instance from sleeping forever if client disconnects during download.
# See: https://gist.github.com/njakobsen/6257887
# A value of 0.1 seems to be more reliable than smaller values.
# sleep 0.1
# Note: Thread.pass doesn't appear to be necessary when streaming S3 objects. Only local files.
Thread.pass
end
ensure
# Always close the stream, even if the client disconnects early
Expand Down Expand Up @@ -89,7 +82,7 @@ def download_access_copy
if @digital_object.is_a?(DigitalObject::Asset)
access_ds = @digital_object.fedora_object&.datastreams&.fetch('access')
raise Hyacinth::Exceptions::NotFoundError, "No access copy location available for #{@digital_object.pid}" unless access_ds.dsLocation
access_file_path = Hyacinth::Utils::PathUtils.ds_location_to_filesystem_path(access_ds.dsLocation)
access_file_path = Hyacinth::Utils::UriUtils.location_uri_to_file_path(access_ds.dsLocation)
raise Hyacinth::Exceptions::NotFoundError, "Access copy file not found at expected location for #{@digital_object.pid}" unless File.exist?(access_file_path)

label = access_ds.dsLabel.present? ? access_ds.dsLabel.split('/').last : 'file' # Use dsLabel as download label if available
Expand Down
1 change: 0 additions & 1 deletion app/controllers/digital_objects_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ def update

handle_publish_param(@digital_object, params)
handle_mint_reserved_doi_param(@digital_object, params)

# Whenever a direct file upload occurs, that triggers a republish so that DLC
# can reindex the object with the access/poster info.
republish_after_save = @digital_object.is_a?(DigitalObject::Asset) && request_includes_direct_upload_file_param?
Expand Down
8 changes: 4 additions & 4 deletions app/jobs/request_derivatives_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ def payload_for_derivative_request(asset, requested_derivatives)
identifier: asset.pid,
delivery_target: 'hyacinth2',
adjust_orientation: asset.fedora_object.orientation,
main_uri: Addressable::URI.encode("file://#{asset.filesystem_location}"),
main_uri: asset.filesystem_location_uri,
requested_derivatives: requested_derivatives,
# access_uri can be nil, if no access copy currently exists
access_uri: asset.access_copy_location.nil? ? nil : Addressable::URI.encode("file://#{asset.access_copy_location}"),
access_uri: asset.access_copy_location_uri,
# poster_uri can be nil, if no poster currently exists
poster_uri: asset.poster_location.nil? ? nil : Addressable::URI.encode("file://#{asset.poster_location}")
poster_uri: asset.poster_location_uri
}
}
end
Expand All @@ -55,7 +55,7 @@ def required_derivatives_for_asset(asset)
required_derivatives = []
# We attempt to generate an access copy for all asset types
required_derivatives << 'access' if asset.access_copy_location.nil?
# Note: Image assets do not need a poster image
# We attempt to generate a poster for all asset types EXCEPT Image assets, since they don't need a poster
required_derivatives << 'poster' if asset.poster_location.nil? && asset.dc_type != 'StillImage'
# Get featured region
required_derivatives << 'featured_region' if asset.featured_region.nil?
Expand Down
5 changes: 4 additions & 1 deletion app/jobs/update_image_service_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ def perform(digital_object_pid)
)
rescue RestClient::BadRequest => e
Rails.logger.error('Received Bad Request response from the image server: ' + JSON.parse(e.http_body)['errors'].inspect)
# rescue RestClient::InternalServerError => e
# Rails.logger.error('Received Internal Server Error response from the image server: ' + JSON.parse(e.http_body)['errors'].inspect)
rescue Errno::ECONNREFUSED
# Silently fail because the image server is currently unavailable and there is nothing we can do.
end
Expand All @@ -22,7 +24,8 @@ def payload_for_image_service_update_request(asset)
source_uri: image_source_uri_for_digital_object(asset),
featured_region: asset.featured_region,
# Supply pcdm type for MAIN resource (not access / poster)
pcdm_type: asset.pcdm_type
pcdm_type: asset.pcdm_type,
has_view_limitation: asset.restricted_size_image
}
}
end
Expand Down
15 changes: 7 additions & 8 deletions app/models/concerns/digital_object/assets/file_import.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@ def do_file_import

# "controlGroup => 'E'" below means "External Referenced Content" -- as in, a file that's referenced by Fedora but not stored in Fedora's internal data store

# Line below will create paths like "file:/this%23_and_%26_also_something%20great/here.txt"
# We DO NOT want a double slash at the beginnings of these paths.
# We need to manually escape ampersands (%26) and pound signs (%23) because these are not always handled by Addressable::URI.encode()
# Line below will create paths like "file:///this%23_and_%26_also_something%20great/here.txt"
# We need to manually escape ampersands (%26) and pound signs (%23) because these are not handled by Addressable::URI.encode()

content_ds = @fedora_object.create_datastream(ActiveFedora::Datastream, 'content', controlGroup: 'E', mimeType: BestType.mime_type.for_file_name(original_filename), dsLabel: original_filename, versionable: true)
content_ds.dsLocation = Hyacinth::Utils::PathUtils.location_uri_to_encoded_ds_location(final_save_location_uri)
content_ds.dsLocation = final_save_location_uri
@fedora_object.datastreams["DC"].dc_source = final_save_location_uri
@fedora_object.add_datastream(content_ds)

Expand Down Expand Up @@ -112,7 +111,7 @@ def do_access_copy_import
dest_file_path = File.join(dest_dir, access_filename)
copy_access_copy_to_save_destination(@access_copy_import_path, dest_file_path)

access_ds_location = Hyacinth::Utils::PathUtils.filesystem_path_to_ds_location(dest_file_path)
access_ds_location = Hyacinth::Utils::UriUtils.file_path_to_location_uri(dest_file_path)

# Create access datastream if it doesn't already exist
access_ds = @fedora_object.datastreams['access']
Expand Down Expand Up @@ -153,10 +152,10 @@ def do_service_copy_import
FileUtils.mkdir_p(dest_dir)
dest_file_path = File.join(dest_dir, 'service' + File.extname(service_filename))
FileUtils.cp(@service_copy_import_path, dest_file_path)
service_ds_location = Hyacinth::Utils::PathUtils.filesystem_path_to_ds_location(dest_file_path)
service_ds_location = Hyacinth::Utils::UriUtils.file_path_to_location_uri(dest_file_path)
when DigitalObject::Asset::IMPORT_TYPE_EXTERNAL
# track file where it is
service_ds_location = Hyacinth::Utils::PathUtils.filesystem_path_to_ds_location(@service_copy_import_path)
service_ds_location = Hyacinth::Utils::UriUtils.file_path_to_location_uri(@service_copy_import_path)
else
raise "Currently unimplemented import mechanism for service copy: #{@service_copy_import_type}"
end
Expand All @@ -178,7 +177,7 @@ def do_poster_import
# When Derivativo 1.5 is released, this can change to 0640 permissions.
FileUtils.chmod(0660, dest_file_path)

poster_ds_location = Hyacinth::Utils::PathUtils.filesystem_path_to_ds_location(dest_file_path)
poster_ds_location = Hyacinth::Utils::UriUtils.file_path_to_location_uri(dest_file_path)

# Create poster datastream if it doesn't already exist
poster_ds = @fedora_object.datastreams['poster']
Expand Down
40 changes: 26 additions & 14 deletions app/models/digital_object/asset.rb
Original file line number Diff line number Diff line change
Expand Up @@ -107,30 +107,42 @@ def convert_upload_import_to_internal!
@import_file_import_type = IMPORT_TYPE_INTERNAL
end

# If the given uri is a file URI, returns an unescaped path string. Otherwise returns the provided uri value, unchanged.
def convert_location_uri_to_path_if_file_uri(uri)
return nil if uri.nil?
uri.start_with?('file:/') ? Hyacinth::Utils::UriUtils.location_uri_to_file_path(uri) : uri
end

def filesystem_location_uri
@fedora_object&.datastreams&.[]('content')&.dsLocation
end

def filesystem_location
content_ds = @fedora_object.datastreams['content']
return nil unless content_ds.present?
Hyacinth::Utils::PathUtils.ds_location_to_decoded_location_uri(content_ds.dsLocation)
convert_location_uri_to_path_if_file_uri(filesystem_location_uri)
end

def access_copy_location_uri
@fedora_object&.datastreams&.[]('access')&.dsLocation
end

def access_copy_location
return nil if @fedora_object.blank?
access_ds = @fedora_object.datastreams['access']
return nil unless access_ds.present?
Addressable::URI.unencode(access_ds.dsLocation).gsub(/^file:/, '')
convert_location_uri_to_path_if_file_uri(access_copy_location_uri)
end

def poster_location_uri
@fedora_object&.datastreams&.[]('poster')&.dsLocation
end

def poster_location
return nil if @fedora_object.blank?
poster_ds = @fedora_object.datastreams['poster']
return nil unless poster_ds.present?
Addressable::URI.unencode(poster_ds.dsLocation).gsub(/^file:/, '')
convert_location_uri_to_path_if_file_uri(poster_location_uri)
end

def service_copy_location_uri
@fedora_object&.datastreams&.[]('service')&.dsLocation
end

def service_copy_location
service_ds = @fedora_object.datastreams['service']
return nil unless service_ds.present?
Addressable::URI.unencode(service_ds.dsLocation).gsub(/^file:/, '')
convert_location_uri_to_path_if_file_uri(service_copy_location_uri)
end

def checksum
Expand Down
2 changes: 1 addition & 1 deletion app/views/digital_objects/media_view.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<script>
$(document).ready(function(){
var tileSources = [Hyacinth.imageServerUrl + "/iiif/2/<%= @digital_object.pid %>/info.json"];
var tileSources = [Hyacinth.imageServerUrl + "/iiif/2/standard/<%= @digital_object.pid %>/info.json"];

var initialPage = 0;

Expand Down
4 changes: 2 additions & 2 deletions config/templates/hyacinth.template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ development:
default_pid_generator_namespace: cul
digital_object_data_directory: <%= Rails.root.join('tmp', 'development', 'data') %>
default_asset_home_bucket_name: 'development-bucket'
default_asset_home_bucket_path_prefix: 'hyacinth_local_development/default_asset_home'
default_asset_home_bucket_path_prefix: 'hyacinth/local_development/default_asset_home'
default_asset_home: <%= Rails.root.join('tmp', 'development', 'asset_home') %>
default_service_copy_home: <%= Rails.root.join('tmp', 'development', 'service_copy_home') %>
access_copy_directory: <%= Rails.root.join('tmp', 'development', 'access') %>
Expand All @@ -22,7 +22,7 @@ test:
default_pid_generator_namespace: cul
digital_object_data_directory: <%= Rails.root.join('tmp', 'test', 'data') %>
default_asset_home_bucket_name: 'test-bucket'
default_asset_home_bucket_path_prefix: 'hyacinth_local_test/default_asset_home'
default_asset_home_bucket_path_prefix: 'hyacinth/local_test/default_asset_home'
default_asset_home: <%= Rails.root.join('tmp', 'test', 'asset_home') %>
default_service_copy_home: <%= Rails.root.join('tmp', 'test', 'service_copy_home') %>
access_copy_directory: <%= Rails.root.join('tmp', 'test', 'access') %>
Expand Down
20 changes: 0 additions & 20 deletions lib/hyacinth/utils/path_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,24 +36,4 @@ def self.access_directory_path_for_uuid!(uuid)
FileUtils.mkdir_p(dest_dir, mode: 0755)
dest_dir
end

# @deprecated This method might be removed soon.
# Converts a file path to a Fedora datastream dsLocation value
def self.filesystem_path_to_ds_location(path)
Addressable::URI.encode('file:' + path).gsub('&', '%26').gsub('#', '%23')
end

# @deprecated This method might be removed soon.
# Converts a Fedora datastream dsLocation value to a file path
def self.ds_location_to_filesystem_path(ds_location)
self.ds_location_to_decoded_location_uri.gsub(/^file:/, '')
end

def self.location_uri_to_encoded_ds_location(location_uri)
Addressable::URI.encode(location_uri).gsub('&', '%26').gsub('#', '%23')
end

def self.ds_location_to_decoded_location_uri(ds_location)
Addressable::URI.unencode(ds_location)
end
end
13 changes: 13 additions & 0 deletions lib/hyacinth/utils/uri_utils.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class Hyacinth::Utils::UriUtils
# Converts a file path to a location URI value
def self.file_path_to_location_uri(path)
raise ArgumentError, "Given path must be absolute. Must start with a slash: #{path}" unless path.start_with?('/')
'file://' + Addressable::URI.encode(path).gsub('&', '%26').gsub('#', '%23')
end

def self.location_uri_to_file_path(location_uri)
# NOTE: Although I'd like to assume that all file URIs start with 'file:///', some older ones will start with 'file:/'.
raise ArgumentError, "Not a valid file URI: #{location_uri}" unless location_uri.start_with?('file:/')
Addressable::URI.unencode(Addressable::URI.parse(location_uri).path)
end
end

0 comments on commit c3ab328

Please sign in to comment.