From 327ab4dfbf9670bc3144e348fba837c3f2019e1d Mon Sep 17 00:00:00 2001 From: sfc-gh-ext-simba-hx Date: Wed, 25 Oct 2023 14:21:33 -0700 Subject: [PATCH] put with GCP to skip existing file --- cpp/SnowflakeGCSClient.cpp | 31 +++++++++++++++++---- tests/test_unit_put_get_gcs.cpp | 49 +++++++++++++++++++++++++++++++-- 2 files changed, 72 insertions(+), 8 deletions(-) diff --git a/cpp/SnowflakeGCSClient.cpp b/cpp/SnowflakeGCSClient.cpp index 7fd5d41a1c..209c13a2ff 100644 --- a/cpp/SnowflakeGCSClient.cpp +++ b/cpp/SnowflakeGCSClient.cpp @@ -66,6 +66,18 @@ RemoteStorageRequestOutcome SnowflakeGCSClient::upload(FileMetadata *fileMetadat if (!m_gcsAccessToken.empty()) { std::string filePathFull = m_stageInfo->location + fileMetadata->destFileName; + + //check if file exists if overwrite is not specified. + if (!fileMetadata->overWrite) + { + RemoteStorageRequestOutcome outcome = GetRemoteFileMetadata(&filePathFull, fileMetadata); + if (RemoteStorageRequestOutcome::SUCCESS == outcome) + { + CXX_LOG_DEBUG("File already exists skipping the file upload %s", + fileMetadata->srcFileToUpload.c_str()); + return RemoteStorageRequestOutcome::SKIP_UPLOAD_FILE; + } + } buildGcsRequest(filePathFull, url, reqHeaders); } else @@ -134,12 +146,21 @@ RemoteStorageRequestOutcome SnowflakeGCSClient::GetRemoteFileMetadata( url = fileMetadata->presignedUrl; } - if (!m_statement->http_get(url, - reqHeaders, - NULL, - headerString, - true)) + try + { + if (!m_statement->http_get(url, + reqHeaders, + NULL, + headerString, + true)) + { + return RemoteStorageRequestOutcome::FAILED; + } + } + catch (...) { + // It's expected to fail when file doesn't exist, while http_get() + // could throw excpetion on that. Catch that and return FAILED return RemoteStorageRequestOutcome::FAILED; } diff --git a/tests/test_unit_put_get_gcs.cpp b/tests/test_unit_put_get_gcs.cpp index a2a6699c24..53872e9ff2 100644 --- a/tests/test_unit_put_get_gcs.cpp +++ b/tests/test_unit_put_get_gcs.cpp @@ -32,7 +32,10 @@ class MockedGCSStatement : public Snowflake::Client::IStatementPutGet { public: MockedGCSStatement(bool useGcsToken) - : m_useGcsToken(useGcsToken), Snowflake::Client::IStatementPutGet() + : m_useGcsToken(useGcsToken), + m_firstTime(true), + m_isPut(false), + Snowflake::Client::IStatementPutGet() { m_stageInfo.stageType = Snowflake::Client::StageType::GCS; if (m_useGcsToken) @@ -63,9 +66,18 @@ class MockedGCSStatement : public Snowflake::Client::IStatementPutGet } else { + m_isPut = true; putGetParseResponse->command = CommandType::UPLOAD; putGetParseResponse->srcLocations = m_srcLocationsPut; putGetParseResponse->threshold = DEFAULT_UPLOAD_DATA_SIZE_THRESHOLD; + if (sql->find("OVERWRITE=true") != std::string::npos) + { + putGetParseResponse->overwrite = true; + } + else + { + putGetParseResponse->overwrite = false; + } } if (!m_useGcsToken) { @@ -106,8 +118,17 @@ class MockedGCSStatement : public Snowflake::Client::IStatementPutGet std::string& responseHeaders, bool headerOnly) override { - if (headerOnly) + if (headerOnly) // try getting metadata { + if (m_isPut) + { + if ((strcasecmp(url.c_str(), m_expectedUrl.c_str()) == 0) && m_firstTime) + { + // first time retry false to mock the case of file doesn't exist + m_firstTime = false; + return false; + } + } responseHeaders = "HTTP/1.1 200 OK\n" "x-goog-meta-encryptiondata: {\"WrappedContentKey\":{\"KeyId\":\"symmKey1\",\"EncryptedKey\":\"MyZBZNLcndKTeR+xC8Msle5IcSYKsx/nYNn93OONSqs=\",\"Algorithm\":\"AES_CBC_256\"},\"ContentEncryptionIV\":\"30fmhKrf1aKyWidrv06NNA==\"}\n" "Content-Type: application/octet-stream\n" @@ -143,12 +164,16 @@ class MockedGCSStatement : public Snowflake::Client::IStatementPutGet bool m_useGcsToken; std::string m_expectedUrl; + + bool m_firstTime; + + bool m_isPut; }; // test helper for put void put_gcs_test_core(bool useGcsToken) { - std::string cmd = std::string("put file://small_file.csv.gz @odbctestStage AUTO_COMPRESS=false OVERWRITE=true"); + std::string cmd = std::string("put file://small_file.csv.gz @odbctestStage AUTO_COMPRESS=false"); MockedGCSStatement mockedStatementPut(useGcsToken); @@ -162,6 +187,24 @@ void put_gcs_test_core(bool useGcsToken) result->getColumnAsString(6, put_status); assert_string_equal("UPLOADED", put_status.c_str()); } + + if (useGcsToken) + { + result = agent.execute(&cmd); + while (result->next()) + { + result->getColumnAsString(6, put_status); + assert_string_equal("SKIPPED", put_status.c_str()); + } + } + + cmd += " OVERWRITE=true"; + result = agent.execute(&cmd); + while (result->next()) + { + result->getColumnAsString(6, put_status); + assert_string_equal("UPLOADED", put_status.c_str()); + } } // test helper for get