Skip to content

Commit

Permalink
updated to use ephemeral storage for files under 19 GB
Browse files Browse the repository at this point in the history
  • Loading branch information
genschmitt committed May 23, 2024
1 parent 17fba6f commit ce7014a
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 4 deletions.
1 change: 1 addition & 0 deletions config/settings/demo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ aws:
queue_url: "https://sqs.us-east-2.amazonaws.com/721945215539/extractor-to-databank-demo"
s3:
json_bucket: "databank-demo-main"
ephemeral_storage_path: "/tmp/extractor/"
3 changes: 2 additions & 1 deletion config/settings/prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ aws:
queue_name: "extractor-to-databank-prod"
queue_url: "https://sqs.us-east-2.amazonaws.com/721945215539/extractor-to-databank-prod"
s3:
json_bucket: "databank-main"
json_bucket: "databank-main"
ephemeral_storage_path: "/tmp/extractor/"
3 changes: 2 additions & 1 deletion config/settings/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ aws:
queue_name: "extractor-to-databank-test"
queue_url: "https://sqs.us-east-2.amazonaws.com/721945215539/extractor-to-databank-test"
s3:
json_bucket: "databank-test-main"
json_bucket: "databank-test-main"
ephemeral_storage_path: "/test/tmp/extractor/"
19 changes: 17 additions & 2 deletions lib/archive_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
class ArchiveExtractor
attr_accessor :s3, :sqs, :bucket_name, :object_key, :binary_name, :web_id, :mime_type, :extraction
Config.load_and_set_settings(Config.setting_files("#{ENV['RUBY_HOME']}/config", ENV['RUBY_ENV']))
STDOUT.sync = true
LOGGER = Logger.new(STDOUT)
GIGABYTE = 2**30

def initialize(bucket_name, object_key, binary_name, web_id, mime_type, sqs, s3)
@bucket_name = bucket_name
Expand All @@ -29,8 +31,10 @@ def initialize(bucket_name, object_key, binary_name, web_id, mime_type, sqs, s3)
def extract
begin
error = []

del_path = "#{Settings.aws.efs.mount_point}#{@bucket_name}_#{@web_id}"

storage_path = get_storage_path
LOGGER.info("Storage path: #{storage_path}")
del_path = "#{storage_path}#{@bucket_name}_#{@web_id}"
local_path = "#{del_path}/#{@object_key}"

dirname = File.dirname(local_path)
Expand All @@ -56,6 +60,17 @@ def extract
end
end

def get_storage_path
resp = @s3.get_object_attributes({
bucket: @bucket_name,
key: @object_key,
object_attributes: ['ObjectSize']
})
object_size = resp.object_size
LOGGER.info("#{@web_id} size: #{object_size}")
object_size > 19 * GIGABYTE ? Settings.aws.efs.mount_point : Settings.ephemeral_storage_path
end

def get_object(local_path, error)
begin
@s3.get_object({
Expand Down
1 change: 1 addition & 0 deletions lib/extractor/extraction.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class Extraction
attr_accessor :binary_name, :storage_path, :status, :peek_type, :peek_text, :id, :nested_items, :error, :mime_type
ALLOWED_CHAR_NUM = 1024 * 8
ALLOWED_DISPLAY_BYTES = ALLOWED_CHAR_NUM * 8
STDOUT.sync = true
LOGGER = Logger.new(STDOUT)
def initialize(binary_name, storage_path, id, mime_type)
@binary_name = binary_name
Expand Down
33 changes: 33 additions & 0 deletions test/archive_extractor_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ def test_extract
@archive_extractor.web_id = 'test-zip'
@archive_extractor.mime_type = 'application/zip'
@archive_extractor.object_key = 'test.zip'
resp = Minitest::Mock.new
resp.expect(:object_size, 23_456_789_123)
@s3.expect(:get_object_attributes, resp, [{bucket: 'test-bucket', key: 'test.zip', object_attributes: ['ObjectSize']}])
del_path = "#{Settings.aws.efs.mount_point}#{@archive_extractor.bucket_name}_#{@archive_extractor.web_id}"
local_path = "#{del_path}/#{@archive_extractor.object_key}"
file_path = "#{ENV['RUBY_HOME']}/test/test.zip"
Expand Down Expand Up @@ -48,6 +51,36 @@ def test_extract
assert_mock(@sqs)
end

def test_get_storage_path_small
# setup
resp = Minitest::Mock.new
@s3.expect(:get_object_attributes, resp, [{bucket: 'test-bucket', key: 'test-key', object_attributes: ['ObjectSize']}])
resp.expect(:object_size, 12_345)

# test
storage_path = @archive_extractor.get_storage_path

# verify
assert_mock(@s3)
assert_mock(resp)
assert_equal(Settings.ephemeral_storage_path, storage_path)
end

def test_get_storage_path_large
# setup
resp = Minitest::Mock.new
@s3.expect(:get_object_attributes, resp, [{bucket: 'test-bucket', key: 'test-key', object_attributes: ['ObjectSize']}])
resp.expect(:object_size, 23_456_789_123)

# test
storage_path = @archive_extractor.get_storage_path

# verify
assert_mock(@s3)
assert_mock(resp)
assert_equal(Settings.aws.efs.mount_point, storage_path)
end

def test_get_object
# setup
local_path = 'test/path'
Expand Down

0 comments on commit ce7014a

Please sign in to comment.