From 3886db52069946767ccfdff14d89f288be23b270 Mon Sep 17 00:00:00 2001 From: katebygrace Date: Mon, 20 May 2024 13:57:41 -0400 Subject: [PATCH] chore: analytics exporter JIRA:CLOUDSEC-12 --- .../jobs/analytics/AnalyticsExporter.groovy | 46 +++++++++++++------ dataeng/resources/setup-exporter.sh | 13 ++++-- .../edx/jenkins/dsl/AnalyticsConstants.groovy | 14 ++++++ 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/dataeng/jobs/analytics/AnalyticsExporter.groovy b/dataeng/jobs/analytics/AnalyticsExporter.groovy index ba7138271..39d83acd3 100644 --- a/dataeng/jobs/analytics/AnalyticsExporter.groovy +++ b/dataeng/jobs/analytics/AnalyticsExporter.groovy @@ -1,7 +1,6 @@ package analytics import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters +import static org.edx.jenkins.dsl.AnalyticsConstants.config_scm import static org.edx.jenkins.dsl.AnalyticsConstants.opsgenie_heartbeat_publisher class AnalyticsExporter { @@ -19,7 +18,6 @@ class AnalyticsExporter { stringParam('TASKS', '', 'Space separated list of tasks to process. Leave this blank to use the task list specified in the config file. Specify here only if you are running tests of a specific task.') stringParam('PYTHON_VENV_VERSION', 'python3.7', 'Python virtual environment version to used.') } - parameters secure_scm_parameters(allVars) environmentVariables { env('REMOTE_CONFIG_PROD_EDX_ROLE_ARN', allVars.get('REMOTE_CONFIG_PROD_EDX_ROLE_ARN')) @@ -32,7 +30,7 @@ class AnalyticsExporter { env('REMOTE_CONFIG_DECRYPTION_KEYS_VAULT_KV_VERSION', allVars.get('REMOTE_CONFIG_DECRYPTION_KEYS_VAULT_KV_VERSION')) } - multiscm secure_scm(allVars) << { + multiscm config_scm(allVars) << { git { remote { url('git@github.com:openedx/edx-platform.git') @@ -55,6 +53,17 @@ class AnalyticsExporter { relativeTargetDirectory('analytics-exporter') } } + git { + remote { + url('git@github.com:edx/analytics-tools.git') + branch('master') + credentials('1') + } + extensions { + pruneBranches() + relativeTargetDirectory('analytics-tools') + } + } } @@ -73,7 +82,7 @@ class AnalyticsExporter { } } - dslFactory.job('analytics-exporter-worker') { + dslFactory.job('analytics-exporter-worker-test') { description('This is a worker/downstream job to the Analytics Exporter. It does all of the legwork of exporting/encrypting the data for a given org. See also: analytics-exporter-master.') parameters { stringParam('NOTIFY') @@ -89,7 +98,6 @@ class AnalyticsExporter { stringParam('EXTRA_OPTIONS') stringParam('PYTHON_VENV_VERSION', 'python3.7', 'Python version to use for creating virtualenv.') } - parameters secure_scm_parameters(allVars) environmentVariables { env('REMOTE_CONFIG_PROD_EDX_ROLE_ARN', allVars.get('REMOTE_CONFIG_PROD_EDX_ROLE_ARN')) @@ -113,7 +121,7 @@ class AnalyticsExporter { concurrentBuild() - multiscm secure_scm(allVars) + multiscm config_scm(allVars) wrappers { timestamps() @@ -139,29 +147,29 @@ class AnalyticsExporter { } } - dslFactory.job('analytics-exporter-master') { + dslFactory.job('analytics-exporter-master-test') { description('The Analytics Exporter weekly job, which exports tons of structure and state data for every course for every participating org and delivers them encrypted to our partners via S3. Specifically, this sets up the shared edx-platform execution environment, fetches a list of all the orgs, then kicks off downstream analytics-exporter-worker jobs for each one that corresponds to a partner which is configured to receive export data.') parameters { stringParam('ORGS', '*', 'Space separated list of organizations to process. Can use wildcards. e.g.: idbx HarvardX') stringParam('EXPORTER_BRANCH', 'origin/master', 'Branch from the edx-analytics-exporter repository. For tags use tags/[tag-name].') stringParam('PLATFORM_BRANCH', 'origin/2u/release', 'Branch from the edx-platform repository. For tags use tags/[tag-name].') stringParam('EXPORTER_CONFIG_FILENAME', 'default.yaml', 'Name of configuration file in analytics-secure/analytics-exporter.') - stringParam('OUTPUT_BUCKET', allVars.get('EXPORTER_OUTPUT_BUCKET'), 'Name of the bucket for the destination of the export data. Can use a path. (eg. export-data/test).') - stringParam('NOTIFY', allVars.get('ANALYTICS_EXPORTER_NOTIFY_LIST'), 'Space separated list of emails to notify in case of failure.') + stringParam('OUTPUT_BUCKET', 's3://edx-analytics-scratch/analytics-test', 'Name of the bucket for the destination of the export data. Can use a path. (eg. export-data/test).') + stringParam('NOTIFY', '', 'Space separated list of emails to notify in case of failure.') + stringParam('DATE_MODIFIER', '', 'Used to set the date of the CWSM dump. Leave blank to use today\'s date. Set to "-d 202x-0x-0x" if that is when the CWSM dump took place. (Leave off quotes.)') stringParam('EXTRA_OPTIONS', '--exclude-task=OrgEmailOptInTask', 'e.g. --exclude-task=OrgEmailOptInTask') stringParam('ORG_CONFIG', 'data-czar-keys/config.yaml', 'Path to the data-czar organization config file.') stringParam('DATA_CZAR_KEYS_BRANCH', 'master', 'Branch to use for the data-czar-keys repository.') stringParam('PRIORITY_ORGS', allVars.get('PRIORITY_ORGS'), 'Space separated list of organizations to process first.') } - parameters secure_scm_parameters(allVars) environmentVariables { env('OPSGENIE_HEARTBEAT_NAME', allVars.get('OPSGENIE_HEARTBEAT_NAME')) env('OPSGENIE_HEARTBEAT_DURATION_NUM', allVars.get('OPSGENIE_HEARTBEAT_DURATION_NUM')) env('OPSGENIE_HEARTBEAT_DURATION_UNIT', allVars.get('OPSGENIE_HEARTBEAT_DURATION_UNIT')) } - multiscm secure_scm(allVars) << { + multiscm config_scm(allVars) << { git { remote { url('git@github.com:openedx/edx-platform.git') @@ -194,7 +202,17 @@ class AnalyticsExporter { relativeTargetDirectory('data-czar-keys') } } - + git { + remote { + url('git@github.com:edx/analytics-tools.git') + branch('master') + credentials('1') + } + extensions { + pruneBranches() + relativeTargetDirectory('analytics-tools') + } + } } triggers{ @@ -220,7 +238,7 @@ class AnalyticsExporter { shell(dslFactory.readFileFromWorkspace('dataeng/resources/setup-platform-venv-py3.sh')) shell(dslFactory.readFileFromWorkspace('dataeng/resources/setup-exporter.sh')) downstreamParameterized { - trigger('analytics-exporter-worker') { + trigger('analytics-exporter-worker-test') { block { // Mark this build step as FAILURE if at least one of the downstream builds were marked FAILED. buildStepFailure('FAILURE') diff --git a/dataeng/resources/setup-exporter.sh b/dataeng/resources/setup-exporter.sh index b5813e5e7..0872dc875 100644 --- a/dataeng/resources/setup-exporter.sh +++ b/dataeng/resources/setup-exporter.sh @@ -6,7 +6,7 @@ mkdir -p /var/lib/jenkins/tmp/analytics-exporter/course-data # Create and activate a virtualenv in shell script EXPORTER_VENV="exporter_venv" -virtualenv --python=python3.8 --clear "${EXPORTER_VENV}" +virtualenv --python=python3.11 --clear "${EXPORTER_VENV}" source "${EXPORTER_VENV}/bin/activate" # Install requirements into this (exporter) virtual environment @@ -16,8 +16,15 @@ pip install -r github_requirements.txt pip install mysql-connector-python -e . popd -# Configuration paths in analytics-secure -SECURE_ROOT=${WORKSPACE}/analytics-secure/analytics-exporter + + +cd analytics-tools/snowflake +pip install argparse boto3 +python3 secrets-manager.py -w -n analytics-secure/analytics-exporter/task-auth.json -v task-auth.json +cd ../../ + +# Configuration paths in analytics-config +SECURE_ROOT=${WORKSPACE}/analytics-config/analytics-exporter CONFIG_PATH=${SECURE_ROOT}/${EXPORTER_CONFIG_FILENAME} GPG_KEYS_PATH=${WORKSPACE}/data-czar-keys diff --git a/src/main/groovy/org/edx/jenkins/dsl/AnalyticsConstants.groovy b/src/main/groovy/org/edx/jenkins/dsl/AnalyticsConstants.groovy index d3042d9b2..722686819 100644 --- a/src/main/groovy/org/edx/jenkins/dsl/AnalyticsConstants.groovy +++ b/src/main/groovy/org/edx/jenkins/dsl/AnalyticsConstants.groovy @@ -60,6 +60,20 @@ class AnalyticsConstants { } } + public static def config_scm = { allVars -> + return { + git { + remote { + url('git@github.com:edx/analytics-config.git') + branch('master') + } + extensions { + pruneBranches() + relativeTargetDirectory('analytics-config') + } + } + } + } public static def data_czar_keys_scm = { allVars -> return { git {