Merge pull request #38 from prashanthShiksha/release-5.1.0

Fix for the survey consumption reports incorrect counts
Sunbird-Ed · Jul 4, 2024 · 745b534 · 745b534
2 parents e65b5af + 2c47a08
commit 745b534
Show file tree

Hide file tree

Showing 4 changed files with 564 additions and 0 deletions.
diff --git a/config.sample b/config.sample
@@ -111,6 +111,7 @@ survey_injestion_spec = {{ml_analytics_druid_survey_batch_ingestion_spec}}
 
 intervals = {{ml_analytics_druid_interval_list}}
 
+ml_distinctCnt_survey_status_spec = {{ml_analytics_distinctCnt_survey_status_batch_ingestion_spec}}
 
 [KAFKA]
 
@@ -234,6 +235,7 @@ survey_druid_data = {{ml_analytics_survey_batchupdate_output_dir}}
 
 program_text_file = {{ml_analytics_project_program}}
 
+survey_distinctCount_status = {{ml_analytics_survey_distinctCount_status_filepath}}
 
 [SLACK]
 
@@ -286,3 +288,5 @@ cname_url = {{ ml_analytics_cname_url }}
 nvsk_imp_projects_data_local_path = {{ ml_analytics_nvsk_imp_projects_data_local_path }}
 
 nvsk_imp_projects_data_blob_path = {{ ml_analytics_nvsk_imp_projects_data_blob_path }}
+
+survey_distinctCount_blob_path = {{ ml_analytics_survey_distinctCount_blob_path }}
diff --git a/release-documents/5.1.0.md b/release-documents/5.1.0.md
@@ -0,0 +1,57 @@
+# Release Note 5.1.0 ML Analytics Service
+
+The survey consumption report on Admin Dashboard is pointing to raw datasource 
+`ml-survey-status` therefore it was showing incorrect values. Hence we have
+created a new script `pyspark_sur_distinct_count_status.py` it will create
+new aggregated data source called `ml-survey-distinctCount-status` to fix admin dashboard issue.
+.
+
+## Deploy ml-analytics-service
+To retrieve the latest release tag for version 5.1.0, please visit the following URL:https://github.com/Sunbird-Ed/ml-analytics-service/tree/release-5.1.0_RC21
+
+To proceed with the deployment process, follow the steps below:
+
+    1. Log in to Jenkins.
+    2. In ml-analytics-service, we don't have a build process; we only have deployment.
+    3. go to Dashboard -> Deploy -> staging -> managed-learn -> ml-analytics-service. OR for dev go to Dashboard -> Deploy -> dev -> managed-learn -> ml-analytics-service.
+    6. Click on "Build with parameters" and provide the latest release tag in the field labeled "ml_analytics_version" and release branch in the "branch_or_tag".Initiate the deployment process.
+    7. Once the job is completed, the services will be deployed on the respective environment.
+
+### Config changes
+Add new templates in config.j2 please refer this for the file path : https://github.com/project-sunbird/sunbird-devops/blob/release-5.1.0/ansible/roles/ml-analytics-service/templates/config.j2
+
+```html
+[DRUID] 
+ml_distinctCnt_survey_status_spec :{{ml_analytics_distinctCnt_survey_status_batch_ingestion_spec}} 
+
+[OUTPUT_DIR]
+survey_distinctCount_status = {{ml_analytics_survey_distinctCount_status_filepath}}
+
+[COMMON]
+survey_distinctCount_blob_path = {{ ml_analytics_survey_distinctCount_blob_path }}
+
+[LOGS]
+survey_streaming_success_error = {{ ml_analytics_survey_streaming_success_log_folder_path }}
+```
+Add configs in main.yml please refer this for the file path : https://github.com/project-sunbird/sunbird-devops/blob/release-5.1.0/ansible/roles/ml-analytics-service/defaults/main.yml
+
+- ml_analytics_distinctCnt_survey_status_batch_ingestion_spec :
+```html
+{"type":"index","spec":{"ioConfig":{"type":"index","inputSource":{"type":"local","baseDir":["local json file storage path"],"filter":"ml_survey_distinctCount_status.json"},"inputFormat":{"type":"json"}},"tuningConfig":{"type":"index","partitionsSpec":{"type":"dynamic"}},"dataSchema":{"dataSource":"ml-surveydistinctCount-status","granularitySpec":{"type":"uniform","queryGranularity":"none","rollup":false,"segmentGranularity":"DAY"},"timestampSpec":{"column":"time_stamp","format":"auto"},"dimensionsSpec":{"dimensions":[{"type":"string","name":"program_name"},{"type":"string","name":"program_id"},{"type":"string","name":"survey_name"},{"type":"string","name":"survey_id"},{"type":"string","name":"submission_status"},{"type":"string","name":"state_name"},{"type":"string","name":"state_externalId"},{"type":"string","name":"district_name"},{"type":"string","name":"district_externalId"},{"type":"string","name":"block_name"},{"type":"string","name":"block_externalId"},{"type":"string","name":"organisation_name"},{"type":"string","name":"organisation_id"},{"type":"string","name":"private_program"},{"type":"string","name":"parent_channel"},{"type":"long","name":"unique_users"},{"type":"long","name":"unique_submissions"},{"type":"string","name":"time_stamp"}]},"metricsSpec":[]}}}
+```
+Note : change the values for keys such as (spec.inConfig.inputSource.baseDir : "local json file storage path")  & (spec.inConfig.inputSource.type: "cloud provider ex.azure")
+
+- ml_analytics_distinctCnt_survey_status_batch_ingestion_spec :"local json file storage path"
+
+- ml_analytics_survey_distinctCount_blob_path : "cloud json file storage path"
+
+- ml_analytics_survey_streaming_success_log_folder_path : "logs storage path"
+
+### Backend Json
+Update the backend json using this API `/api/data/v1/report/jobs/`
+
+1\.[ml no of surveys in started status currently sl.json](https://github.com/shikshalokam/ml-analytics-service/blob/release-6.0.0/migrations/releases/6.0.0/config/backend/create/ml_no_of_surveys_in_started_status_currently_sl.json)
+
+2\.[ml no of surveys submitted till date sl.json](https://github.com/shikshalokam/ml-analytics-service/blob/release-6.0.0/migrations/releases/6.0.0/config/backend/create/ml_no_of_surveys_submitted_till_date_sl.json)
+
+Note. change the values for keys such as createdBy(ex.b5bf1586-1181-4c14-9e7a-3cbc6099b327) , container(ex.analytics) , postcontainer(ex.analytics) , store(ex.azure) with respect to environment.
diff --git a/run.sh b/run.sh
@@ -55,5 +55,14 @@ echo "Daily Survey Status Batch Job Ingestion == Started"
 echo "Daily Survey Status Batch Job Ingestion == Completed"
 echo "*************************************"
 
+# SURVEY DISTINCT COUNT STATUS: Deletion and Ingestion
+echo ""
+echo "$(date)"
+echo "====================================="
+echo "Daily Survey Distinct Count Status Batch Job Ingestion == Started"
+. /opt/sparkjobs/spark_venv/bin/activate && /opt/sparkjobs/spark_venv/lib/python3.8/site-packages/pyspark/bin/spark-submit --driver-memory 50g --executor-memory 50g /opt/sparkjobs/ml-analytics-service/survey/pyspark_sur_distinct_count_status.py
+echo "Daily Survey Distinct Count Status Batch Job Ingestion == Completed"
+echo "*************************************"
+
 echo "COMPLETED"