Skip to content

Commit

Permalink
changes to introduce Proxy account in the existing repo
Browse files Browse the repository at this point in the history
  • Loading branch information
rdeshmukh15 committed Nov 29, 2024
1 parent c14c648 commit e6d20fa
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 13 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## 1.4.0
* Add proxy AWS Account as a middleware to mask the Qlik AWS account ID
* [#69](https://github.com/singer-io/tap-s3-csv/pull/69)

## 1.3.9
* Handle S3 files race condition
* [#67](https://github.com/singer-io/tap-s3-csv/pull/67)
Expand Down
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ Here is an example of basic config, and a bit of a run down on each of the prope
- **tables**: An escaped JSON string that the tap will use to search for files, and emit records as "tables" from those files. Will be used by a [`voluptuous`](https://github.com/alecthomas/voluptuous)-based configuration checker.
- **request_timeout**: (optional) The maximum time for which request should wait to get a response. Default request_timeout is 300 seconds.

Below are the additional properties, to add in config if running this tap using Qlik environment:
```
"proxy_account_id": "221133445566",
"proxy_role_name": "proxy_role_with_bucket_access"
```
Proxy AWS account will act as a middleware to mask the Qlik's Account details.
- **proxy_account_id**: This is the Proxy AWS account id.
- **proxy_role_name**: This is the Proxy IAM role that allows the Qlik account to assume it and then use this role to access S3 bucket in your account.

The `table` field consists of one or more objects, JSON encoded as an array and escaped using backslashes (e.g., `\"` for `"` and `\\` for `\`), that describe how to find files and emit records. A more detailed (and unescaped) example below:

```
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import setup

setup(name='tap-s3-csv',
version='1.3.9',
version='1.4.0',
description='Singer.io tap for extracting CSV files from S3',
author='Stitch',
url='https://singer.io',
Expand Down
10 changes: 8 additions & 2 deletions tap_s3_csv/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

LOGGER = singer.get_logger()

REQUIRED_CONFIG_KEYS = ["start_date", "bucket", "proxy_external_id", "external_id", "proxy_account_id", "account_id", "proxy_role_name", "role_name"]
REQUIRED_CONFIG_KEYS = ["start_date", "bucket", "account_id", "external_id", "role_name"]


def do_discover(config):
Expand Down Expand Up @@ -83,7 +83,13 @@ def main():
break
LOGGER.warning("I have direct access to the bucket without assuming the configured role.")
except:
s3.setup_aws_client(config)
# Check if proxy_account_id and proxy_role_name are in config
if 'proxy_account_id' in config and 'proxy_role_name' in config:
# If both are present, call setup_aws_client_with_proxy
s3.setup_aws_client_with_proxy(config)
else:
# Otherwise, call setup_aws_client
s3.setup_aws_client(config)

if args.discover:
do_discover(args.config)
Expand Down
38 changes: 28 additions & 10 deletions tap_s3_csv/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,32 @@ def load(self):

@retry_pattern
def setup_aws_client(config):
role_arn = "arn:aws:iam::{}:role/{}".format(config['account_id'].replace('-', ''),
config['role_name'])
session = Session()
fetcher = AssumeRoleCredentialFetcher(
session.create_client,
session.get_credentials(),
role_arn,
extra_args={
'DurationSeconds': 3600,
'RoleSessionName': 'TapS3CSV',
'ExternalId': config['external_id']
},
cache=JSONFileCache()
)

refreshable_session = Session()
refreshable_session.register_component(
'credential_provider',
CredentialResolver([AssumeRoleProvider(fetcher)])
)

LOGGER.info("Attempting to assume_role on RoleArn: %s", role_arn)
boto3.setup_default_session(botocore_session=refreshable_session)

@retry_pattern
def setup_aws_client_with_proxy(config):
proxy_role_arn = "arn:aws:iam::{}:role/{}".format(config['proxy_account_id'].replace('-', ''),
config['proxy_role_name'])
cust_role_arn = "arn:aws:iam::{}:role/{}".format(config['account_id'].replace('-', ''), config['role_name'])
Expand All @@ -103,8 +129,7 @@ def setup_aws_client(config):
role_arn=proxy_role_arn,
extra_args={
'DurationSeconds': 3600,
'RoleSessionName': 'ProxySession',
'ExternalId': config['proxy_external_id']
'RoleSessionName': 'ProxySession'
},
cache=JSONFileCache()
)
Expand All @@ -124,19 +149,12 @@ def setup_aws_client(config):
role_arn=cust_role_arn,
extra_args={
'DurationSeconds': 3600,
'RoleSessionName': 'CustSession',
'RoleSessionName': 'TapS3CSVCustSession',
'ExternalId': config['external_id']
},
cache=JSONFileCache()
)

# # Refreshable credentials for Account Customer
# refreshable_credentials_c = RefreshableCredentials.create_from_metadata(
# metadata=fetcher_cust.fetch_credentials(),
# refresh_using=fetcher_cust.fetch_credentials,
# method="sts-assume-role"
# )

# Set up refreshable session for Customer Account
refreshable_session_cust = Session()
refreshable_session_cust.register_component(
Expand Down

0 comments on commit e6d20fa

Please sign in to comment.