From f83669f5ba52b624044fde2adf38bb565eed5115 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 9 May 2024 09:46:25 -0400 Subject: [PATCH] Rev object store docs. --- .../sample/object_store_conf.sample.yml | 85 ++++++++++++++++++- 1 file changed, 82 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/config/sample/object_store_conf.sample.yml b/lib/galaxy/config/sample/object_store_conf.sample.yml index b1b2cb34afec..5fdfcb8d8ed1 100644 --- a/lib/galaxy/config/sample/object_store_conf.sample.yml +++ b/lib/galaxy/config/sample/object_store_conf.sample.yml @@ -135,10 +135,64 @@ backends: store_by: uuid files_dir: /old-fs/galaxy/files + +# There are now four ways to access S3 related services. Two are +# suitable just for AWS services (aws_s3 & cloud), one is +# more suited for non-AWS S3 compatible services (generic_s3), +# and finally boto3 gracefully handles either scenario. +# +# boto3 is built on the newest and most widely used Python client +# outside of Galaxy. It has advanced transfer options and is likely +# the client you should use for new setup. generic_s3 and aws_s3 +# have existed in Galaxy for longer and could perhaps be considered +# more battle tested. Both boto3 and generic_s3 have been tested +# with multiple non-AWS APIs including minio and GCP. The cloud +# implementation is based on CloudBridge and is still supported +# and has been recently tested - the downside is mostly the advanced +# multi-threaded processing options of boto3 are not available +# and it has not been battle tested like aws_s3. + +# +# Sample AWS S3 Object Store configuration (newest boto3 client) +# +type: boto3 +auth: + access_key: ... + secret_key: ... +bucket: + name: unique_bucket_name_all_lowercase +connection: # not strictly needed but more of the API works with this. + region: us-east-1 +transfer: + multipart_threshold: 10000000 + download_max_concurrency: 5 + upload_max_concurrency: 10 + # any of these options: + # multipart_threshold, max_concurrency, multipart_chunksize, + # num_download_attempts, max_io_queue, io_chunksize, use_threads, + # and max_bandwidth + # can be set. By default they will apply to uploads and downloads + # but they can be prefixed with upload_ or download_ as shown above + # to apply to just one scenario. More information about these parameters + # can be found at: + # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.TransferConfig + +cache: + path: database/object_store_cache_s3 + size: 1000 + cache_updated_data: true +extra_dirs: + - type: job_work + path: database/job_working_directory_s3 + + + # -# Sample AWS S3 Object Store configuration +# Sample AWS S3 Object Store configuration (legacy boto implementation) # - +# This implementation will use axel automatically for file transfers if it is on +# Galaxy's path. Otherwise, it will use various python-based strategies for multi-part +# upload of large uploads but all downloads will be single threaded. type: aws_s3 auth: access_key: ... @@ -147,6 +201,8 @@ bucket: name: unique_bucket_name_all_lowercase use_reduced_redundancy: false max_chunk_size: 250 +connection: # not strictly needed but more of the API works with this. + region: us-east-1 cache: path: database/object_store_cache_s3 size: 1000 @@ -182,7 +238,30 @@ extra_dirs: path: database/job_working_directory_irods # -# Sample non-AWS S3 Object Store (e.g. swift) configuration +# Sample non-AWS S3 Object Store (e.g. swift) configuration (boto3) +# + +type: boto3 +auth: + access_key: ... + secret_key: ... +bucket: + name: unique_bucket_name_all_lowercase +connection: + endpoint_url: https://swift.example.org:6000/ + # region: some services may make use of region is specified. +cache: + path: database/object_store_cache_swift + size: 1000 + cache_updated_data: true +# transfer: # see transfer options for boto3 above in AWS configuration. +extra_dirs: + - type: job_work + path: database/job_working_directory_swift + + +# +# Sample non-AWS S3 Object Store (e.g. swift) configuration (legacy boto client) # type: generic_s3