diff --git a/packaging/resource_suite_s3_nocache.py b/packaging/resource_suite_s3_nocache.py index 8d309f7..81205d2 100644 --- a/packaging/resource_suite_s3_nocache.py +++ b/packaging/resource_suite_s3_nocache.py @@ -119,8 +119,6 @@ def setUp(self): if hasattr(self, 's3sse'): self.s3_context += ';S3_SERVER_ENCRYPT=' + str(self.s3sse) - self.s3_context += ';ARCHIVE_NAMING_POLICY=' + self.archive_naming_policy - self.admin.assert_icommand("iadmin modresc demoResc name origResc", 'STDOUT_SINGLELINE', 'rename', input='yes\n') self.admin.assert_icommand("iadmin mkresc demoResc s3 " + hostname + ":/" + self.s3bucketname + "/demoResc " + self.s3_context, 'STDOUT_SINGLELINE', 's3') @@ -2869,3 +2867,157 @@ def test_iput_large_file_with_checksum_issue_2124(self): self.user0.assert_icommand("irm -f {file1}".format(**locals()), 'EMPTY') s3plugin_lib.remove_if_exists(file1) s3plugin_lib.remove_if_exists(file1_get) + +class Test_S3_NoCache_Decoupled_Base(Test_S3_NoCache_Base): + + def test_decoupled_redirect_issue_2146(self): + + file1 = "f1" + file1_size = 2*1024*1024 + retrieved_file = "f1.get" + resource_host = test.settings.HOSTNAME_3 + resource_name = "s3_resc_on_host3" + + # create an S3 resource + self.admin.assert_icommand(f'iadmin mkresc {resource_name} s3 {resource_host}:/{self.s3bucketname}/{resource_name} {self.s3_context}', 'STDOUT_SINGLELINE', 's3') + + try: + # create the file + lib.make_arbitrary_file(file1, file1_size) + + # put the file + self.user1.assert_icommand(f'iput -R {resource_name} {file1}') # iput + + # get and verify the file contents + self.user1.assert_icommand(f'iget {file1} {retrieved_file}') + + self.assertTrue(filecmp.cmp(file1, retrieved_file)) # confirm retrieved is correct + + # verify in the physical path that decoupled mode was used + print(self.user1.run_icommand(['ils', '-l', file1])[0]) # just debug + self.user1.assert_icommand(f'ils -L {file1}', 'STDOUT_SINGLELINE', f'/{self.s3bucketname}/[0-9]+/{file1}', use_regex=True) + + finally: + # cleanup + self.user1.assert_icommand("irm -f %s" % file1) # irm + self.admin.assert_icommand("iadmin rmresc %s" % resource_name) + s3plugin_lib.remove_if_exists(file1) + s3plugin_lib.remove_if_exists(retrieved_file) + + # This verifies that once the file is written to the DB, whatever mode was used remains in effect as the DB always wins. + def test_resource_updated_from_consistent_to_decoupled_issue_2161(self): + + file1 = "f1" + file1_size = 2*1024 + file2 = "f2" + file2_size = 3*1024 + retrieved_file = 'f.get' + hostname = lib.get_hostname() + resource_name = "s3_resc" + + s3_context_consistent = self.s3_context.replace('ARCHIVE_NAMING_POLICY=decoupled', 'ARCHIVE_NAMING_POLICY=consistent') + s3_context_decoupled = self.s3_context + + # create the S3 resource + self.admin.assert_icommand(f'iadmin mkresc {resource_name} s3 {hostname}:/{self.s3bucketname}/{resource_name} {s3_context_consistent}', 'STDOUT_SINGLELINE', 's3') + + try: + # create file1 and file2 + lib.make_arbitrary_file(file1, file1_size) + lib.make_arbitrary_file(file2, file2_size) + + # put file1 + self.user1.assert_icommand(f'iput -R {resource_name} {file1}') + + # get and verify the file contents + self.user1.assert_icommand(f'iget {file1} {retrieved_file}') + self.assertTrue(filecmp.cmp(file1, retrieved_file)) # confirm retrieved is correct + + # verify in the physical path that consistent mode was used + print(self.user1.run_icommand(['ils', '-l', file1])[0]) # just debug + self.user1.assert_icommand(f'ils -L {file1}', 'STDOUT_SINGLELINE', f'/{self.s3bucketname}/{resource_name}/home/.+/{file1}', use_regex=True) + + # update the resource to decoupled mode + self.admin.run_icommand(f'iadmin modresc {resource_name} context {s3_context_decoupled}') + + # get and verify the file contents + self.user1.assert_icommand(f'iget -f {file1} {retrieved_file}') + self.assertTrue(filecmp.cmp(file1, retrieved_file)) # confirm retrieved is correct with file1 + + # overwrite file1 with file2 + self.user1.assert_icommand(f'iput -f -R {resource_name} {file2} {file1}') + + # verify the path remains in consistent mode (DB rules) + print(self.user1.run_icommand(['ils', '-l', file1])[0]) # just debug + self.user1.assert_icommand(f'ils -L {file1}', 'STDOUT_SINGLELINE', f'/{self.s3bucketname}/{resource_name}/home/.+/{file1}', use_regex=True) + + # get and verify the file contents + self.user1.assert_icommand(f'iget -f {file1} {retrieved_file}') + self.assertTrue(filecmp.cmp(file2, retrieved_file)) # confirm retrieved is correct with file2 + + finally: + # cleanup + self.user1.assert_icommand("irm -f %s" % file1) # irm + self.admin.assert_icommand("iadmin rmresc %s" % resource_name) + s3plugin_lib.remove_if_exists(file1) + s3plugin_lib.remove_if_exists(file2) + s3plugin_lib.remove_if_exists(retrieved_file) + + # This verifies that once the file is written to the DB, whatever mode was used remains in effect as the DB always wins. + def test_resource_updated_from_decoupled_to_consistent_issue_2161(self): + + file1 = "f1" + file1_size = 2*1024 + file2 = "f2" + file2_size = 3*1024 + retrieved_file = 'f.get' + hostname = lib.get_hostname() + resource_name = "s3_resc" + + s3_context_consistent = self.s3_context.replace('ARCHIVE_NAMING_POLICY=decoupled', 'ARCHIVE_NAMING_POLICY=consistent') + s3_context_decoupled = self.s3_context + + # create the S3 resource + self.admin.assert_icommand(f'iadmin mkresc {resource_name} s3 {hostname}:/{self.s3bucketname}/{resource_name} {s3_context_decoupled}', 'STDOUT_SINGLELINE', 's3') + + try: + # create file1 and file2 + lib.make_arbitrary_file(file1, file1_size) + lib.make_arbitrary_file(file2, file2_size) + + # put file1 + self.user1.assert_icommand(f'iput -R {resource_name} {file1}') + + # get and verify the file contents + self.user1.assert_icommand(f'iget {file1} {retrieved_file}') + self.assertTrue(filecmp.cmp(file1, retrieved_file)) # confirm retrieved is correct + + # verify in the physical path that decoupled mode was used + print(self.user1.run_icommand(['ils', '-l', file1])[0]) # just debug + self.user1.assert_icommand(f'ils -L {file1}', 'STDOUT_SINGLELINE', f'/{self.s3bucketname}/[0-9]+/{file1}', use_regex=True) + + # update the resource to consistent mode + self.admin.run_icommand(f'iadmin modresc {resource_name} context {s3_context_consistent}') + + # get and verify the file contents + self.user1.assert_icommand(f'iget -f {file1} {retrieved_file}') + self.assertTrue(filecmp.cmp(file1, retrieved_file)) # confirm retrieved is correct with file1 + + # overwrite file1 with file2 + self.user1.assert_icommand(f'iput -f -R {resource_name} {file2} {file1}') + + # verify the path remains in decoupled mode (DB rules) + print(self.user1.run_icommand(['ils', '-l', file1])[0]) # just debug + self.user1.assert_icommand(f'ils -L {file1}', 'STDOUT_SINGLELINE', f'/{self.s3bucketname}/[0-9]+/{file1}', use_regex=True) + + # get and verify the file contents + self.user1.assert_icommand(f'iget -f {file1} {retrieved_file}') + self.assertTrue(filecmp.cmp(file2, retrieved_file)) # confirm retrieved is correct with file2 + + finally: + # cleanup + self.user1.assert_icommand("irm -f %s" % file1) # irm + self.admin.assert_icommand("iadmin rmresc %s" % resource_name) + s3plugin_lib.remove_if_exists(file1) + s3plugin_lib.remove_if_exists(file2) + s3plugin_lib.remove_if_exists(retrieved_file) diff --git a/packaging/test_irods_resource_plugin_s3.py b/packaging/test_irods_resource_plugin_s3.py index 763bc2e..af4fe32 100644 --- a/packaging/test_irods_resource_plugin_s3.py +++ b/packaging/test_irods_resource_plugin_s3.py @@ -13,6 +13,7 @@ from .resource_suite_s3_nocache import Test_S3_NoCache_Large_File_Tests_Base from .resource_suite_s3_nocache import Test_S3_NoCache_Glacier_Base from .resource_suite_s3_nocache import Test_S3_NoCache_MPU_Disabled_Base +from .resource_suite_s3_nocache import Test_S3_NoCache_Decoupled_Base from .resource_suite_s3_cache import Test_S3_Cache_Base from .resource_suite_s3_cache import Test_S3_Cache_Glacier_Base @@ -125,7 +126,7 @@ def __init__(self, *args, **kwargs): self.s3EnableMPU=1 super(Test_S3_NoCache_SSE, self).__init__(*args, **kwargs) -class Test_S3_NoCache_Decoupled(Test_S3_NoCache_Base, unittest.TestCase): +class Test_S3_NoCache_Decoupled(Test_S3_NoCache_Decoupled_Base, unittest.TestCase): def __init__(self, *args, **kwargs): """Set up the test.""" self.keypairfile='/projects/irods/vsphere-testing/externals/amazon_web_services-CI.keypair' diff --git a/s3/s3_operations.cpp b/s3/s3_operations.cpp index cba0155..dd403b4 100644 --- a/s3/s3_operations.cpp +++ b/s3/s3_operations.cpp @@ -53,6 +53,7 @@ #include #include #include +#include extern std::size_t g_retry_count; extern std::size_t g_retry_wait; @@ -401,24 +402,66 @@ namespace irods_s3 { } } - if (index > 0) { - - std::string obj_id = boost::lexical_cast(L1desc[index].dataObjInfo->dataId); - std::reverse(obj_id.begin(), obj_id.end()); + // On redirect there is not an entry in L1desc[]. The following rules explain the behavior in + // this instance. + // + // 1. s3_notify_operation() gets called on the server the client is connected to. + // 2. In s3_notify_operation(), this method gets called with a L1desc[] entry so that index > 0. + // The L1desc[] entry is updated along with the object->physical_path() but only if + // openType == CREATE. This part ensures the database gets updated with the proper physical path. + // 3. On the redirected server, s3_file_create_operation() gets called which also calls + // this method. In that case there is no L1desc[] entry but object->physical_path() + // needs to be updated so the file is written to the correct location in S3. Do a + // GenQuery to get the object_id and use this to set the object->physical_path(). + + if (index > 0) { + // There is a corresponding L1desc[] entry. Look up the object_id in it. Reverse it + // for the key. Write the physical_path to the L1desc[] entry as well as object->physical_path(). + + std::string obj_id = boost::lexical_cast(L1desc[index].dataObjInfo->dataId); + std::reverse(obj_id.begin(), obj_id.end()); // make S3 key name const auto s3_key_name = fmt::format("/{}/{}/{}", bucket_name, obj_id, object_name); - // update physical path - logger::debug("{}:{} ({}) [[{}]] updating physical_path to {}", - __FILE__, __LINE__, __FUNCTION__, thread_id, s3_key_name.c_str()); - object->physical_path(s3_key_name); - strncpy(L1desc[index].dataObjInfo->filePath, s3_key_name.c_str(), MAX_NAME_LEN); - L1desc[index].dataObjInfo->filePath[MAX_NAME_LEN - 1] = '\0'; - } - - } - } + // Update physical path but only on first creation otherwise the policy that was in effect + // at the time the object was first created wins. + if (L1desc[index].openType == CREATE_TYPE) { + logger::debug("{}:{} ({}) [[{}]] updating physical_path to {}", + __FILE__, + __LINE__, + __FUNCTION__, + thread_id, + s3_key_name.c_str()); + object->physical_path(s3_key_name); + strncpy(L1desc[index].dataObjInfo->filePath, s3_key_name.c_str(), MAX_NAME_LEN); + L1desc[index].dataObjInfo->filePath[MAX_NAME_LEN - 1] = '\0'; + } + } + else { + // There is no L1desc[] entry. Look up the object_id via GenQuery. Reverse it + // for the key. Write the physical_path to object->physical_path(). + + auto path{std::filesystem::path(object->logical_path())}; + std::string query_string = fmt::format("SELECT DATA_ID WHERE DATA_NAME = '{}' AND COLL_NAME = '{}'", + path.filename().c_str(), + path.parent_path().c_str()); + for (const auto& row : irods::query{_ctx.comm(), query_string}) { + std::string object_id = row[0]; + std::reverse(object_id.begin(), object_id.end()); + const auto s3_key_name = fmt::format("/{}/{}/{}", bucket_name, object_id, object_name); + logger::debug("{}:{} ({}) [[{}]] updating physical_path to {}", + __FILE__, + __LINE__, + __FUNCTION__, + thread_id, + s3_key_name.c_str()); + object->physical_path(s3_key_name); + break; // data_id is the same for all replicas so we are done + } + } + } + } std::ios_base::openmode translate_open_mode_posix_to_stream(int oflag, const std::string& call_from) noexcept { @@ -2231,7 +2274,15 @@ namespace irods_s3 { irods::error s3_notify_operation( irods::plugin_context& _ctx, const std::string* str ) { - return SUCCESS(); - } // s3_notify_operation + if (is_cacheless_mode(_ctx.prop_map())) { + // Must update the physical_path in the L1desc[] table for decoupled naming. + // In the case of a redirect, this runs on the original connected server and this + // is the server that updates the database. In update_physical_path_for_decoupled_naming, + // the update will only happen if it is a create. Anything else uses whatever was + // previously in the database. + update_physical_path_for_decoupled_naming(_ctx); + } + return SUCCESS(); + } // s3_notify_operation }