From ce7014ad766c178904727c5de98d1072bbfc4104 Mon Sep 17 00:00:00 2001 From: Gen Schmitt Date: Thu, 23 May 2024 08:35:20 -0600 Subject: [PATCH] updated to use ephemeral storage for files under 19 GB --- config/settings/demo.yml | 1 + config/settings/prod.yml | 3 ++- config/settings/test.yml | 3 ++- lib/archive_extractor.rb | 19 +++++++++++++++++-- lib/extractor/extraction.rb | 1 + test/archive_extractor_test.rb | 33 +++++++++++++++++++++++++++++++++ 6 files changed, 56 insertions(+), 4 deletions(-) diff --git a/config/settings/demo.yml b/config/settings/demo.yml index 18367ed..8f39349 100644 --- a/config/settings/demo.yml +++ b/config/settings/demo.yml @@ -6,3 +6,4 @@ aws: queue_url: "https://sqs.us-east-2.amazonaws.com/721945215539/extractor-to-databank-demo" s3: json_bucket: "databank-demo-main" +ephemeral_storage_path: "/tmp/extractor/" \ No newline at end of file diff --git a/config/settings/prod.yml b/config/settings/prod.yml index 40c24e7..912e477 100644 --- a/config/settings/prod.yml +++ b/config/settings/prod.yml @@ -5,4 +5,5 @@ aws: queue_name: "extractor-to-databank-prod" queue_url: "https://sqs.us-east-2.amazonaws.com/721945215539/extractor-to-databank-prod" s3: - json_bucket: "databank-main" \ No newline at end of file + json_bucket: "databank-main" +ephemeral_storage_path: "/tmp/extractor/" \ No newline at end of file diff --git a/config/settings/test.yml b/config/settings/test.yml index db955db..4b86fd1 100644 --- a/config/settings/test.yml +++ b/config/settings/test.yml @@ -5,4 +5,5 @@ aws: queue_name: "extractor-to-databank-test" queue_url: "https://sqs.us-east-2.amazonaws.com/721945215539/extractor-to-databank-test" s3: - json_bucket: "databank-test-main" \ No newline at end of file + json_bucket: "databank-test-main" +ephemeral_storage_path: "/test/tmp/extractor/" \ No newline at end of file diff --git a/lib/archive_extractor.rb b/lib/archive_extractor.rb index 094aa02..e351b1b 100644 --- a/lib/archive_extractor.rb +++ b/lib/archive_extractor.rb @@ -14,7 +14,9 @@ class ArchiveExtractor attr_accessor :s3, :sqs, :bucket_name, :object_key, :binary_name, :web_id, :mime_type, :extraction Config.load_and_set_settings(Config.setting_files("#{ENV['RUBY_HOME']}/config", ENV['RUBY_ENV'])) + STDOUT.sync = true LOGGER = Logger.new(STDOUT) + GIGABYTE = 2**30 def initialize(bucket_name, object_key, binary_name, web_id, mime_type, sqs, s3) @bucket_name = bucket_name @@ -29,8 +31,10 @@ def initialize(bucket_name, object_key, binary_name, web_id, mime_type, sqs, s3) def extract begin error = [] - - del_path = "#{Settings.aws.efs.mount_point}#{@bucket_name}_#{@web_id}" + + storage_path = get_storage_path + LOGGER.info("Storage path: #{storage_path}") + del_path = "#{storage_path}#{@bucket_name}_#{@web_id}" local_path = "#{del_path}/#{@object_key}" dirname = File.dirname(local_path) @@ -56,6 +60,17 @@ def extract end end + def get_storage_path + resp = @s3.get_object_attributes({ + bucket: @bucket_name, + key: @object_key, + object_attributes: ['ObjectSize'] + }) + object_size = resp.object_size + LOGGER.info("#{@web_id} size: #{object_size}") + object_size > 19 * GIGABYTE ? Settings.aws.efs.mount_point : Settings.ephemeral_storage_path + end + def get_object(local_path, error) begin @s3.get_object({ diff --git a/lib/extractor/extraction.rb b/lib/extractor/extraction.rb index b564f89..054b314 100644 --- a/lib/extractor/extraction.rb +++ b/lib/extractor/extraction.rb @@ -21,6 +21,7 @@ class Extraction attr_accessor :binary_name, :storage_path, :status, :peek_type, :peek_text, :id, :nested_items, :error, :mime_type ALLOWED_CHAR_NUM = 1024 * 8 ALLOWED_DISPLAY_BYTES = ALLOWED_CHAR_NUM * 8 + STDOUT.sync = true LOGGER = Logger.new(STDOUT) def initialize(binary_name, storage_path, id, mime_type) @binary_name = binary_name diff --git a/test/archive_extractor_test.rb b/test/archive_extractor_test.rb index 1ed4313..baa4a3d 100644 --- a/test/archive_extractor_test.rb +++ b/test/archive_extractor_test.rb @@ -20,6 +20,9 @@ def test_extract @archive_extractor.web_id = 'test-zip' @archive_extractor.mime_type = 'application/zip' @archive_extractor.object_key = 'test.zip' + resp = Minitest::Mock.new + resp.expect(:object_size, 23_456_789_123) + @s3.expect(:get_object_attributes, resp, [{bucket: 'test-bucket', key: 'test.zip', object_attributes: ['ObjectSize']}]) del_path = "#{Settings.aws.efs.mount_point}#{@archive_extractor.bucket_name}_#{@archive_extractor.web_id}" local_path = "#{del_path}/#{@archive_extractor.object_key}" file_path = "#{ENV['RUBY_HOME']}/test/test.zip" @@ -48,6 +51,36 @@ def test_extract assert_mock(@sqs) end + def test_get_storage_path_small + # setup + resp = Minitest::Mock.new + @s3.expect(:get_object_attributes, resp, [{bucket: 'test-bucket', key: 'test-key', object_attributes: ['ObjectSize']}]) + resp.expect(:object_size, 12_345) + + # test + storage_path = @archive_extractor.get_storage_path + + # verify + assert_mock(@s3) + assert_mock(resp) + assert_equal(Settings.ephemeral_storage_path, storage_path) + end + + def test_get_storage_path_large + # setup + resp = Minitest::Mock.new + @s3.expect(:get_object_attributes, resp, [{bucket: 'test-bucket', key: 'test-key', object_attributes: ['ObjectSize']}]) + resp.expect(:object_size, 23_456_789_123) + + # test + storage_path = @archive_extractor.get_storage_path + + # verify + assert_mock(@s3) + assert_mock(resp) + assert_equal(Settings.aws.efs.mount_point, storage_path) + end + def test_get_object # setup local_path = 'test/path'