From 5cfbe2dbdc93e8128cd4d3c338c00f5da39bbd0a Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 2 Mar 2022 15:41:27 +0000 Subject: [PATCH] fix for #680 and a fix for missing properties when indexed generate with expert distribution extra EMR defaults --- livingatlas/configs/la-pipelines-emr.yaml | 145 +++++++++++++++++- .../scripts/update-solr-cluster-config.sh | 4 +- 2 files changed, 143 insertions(+), 6 deletions(-) diff --git a/livingatlas/configs/la-pipelines-emr.yaml b/livingatlas/configs/la-pipelines-emr.yaml index 0127db13dc..4bdd6a8522 100644 --- a/livingatlas/configs/la-pipelines-emr.yaml +++ b/livingatlas/configs/la-pipelines-emr.yaml @@ -30,11 +30,11 @@ fs: local: fsPath: /data hdfs: - fsPath: hdfs:// + fsPath: hdfs:/// dwca-avro: - inputPath: /mnt/dwca-tmp/{datasetId} - tempLocation: /mnt/dwca-tmp/{datasetId} + inputPath: /data/biocache-load/{datasetId} + tempLocation: /data/biocache-load/{datasetId} targetPath: hdfs:///pipelines-data interpret: @@ -118,4 +118,141 @@ alaNameMatch: timeoutSec: 70 retryConfig: maxAttempts: 5 - initialIntervalMillis: 5000 \ No newline at end of file + initialIntervalMillis: 5000 + + +### la-pipelines cli additional arguments, like JVM or spark command line arguments +interpret-sh-args: + local: + jvm: -Xmx8g -XX:+UseG1GC -Dspark.master=local[*] + spark-embedded: + jvm: -Xmx8g -XX:+UseG1GC -Dspark.master=local[*] + spark-cluster: + conf: spark.default.parallelism=48 + num-executors: 8 + executor-cores: 8 + executor-memory: 18G + driver-memory: 2G + +image-sync-sh-args: + local: + jvm: -Xmx8g -XX:+UseG1GC -Dspark.master=local[*] + spark-embedded: + jvm: -Xmx8g -XX:+UseG1GC -Dspark.master=local[*] + spark-cluster: + conf: spark.default.parallelism=48 + num-executors: 8 + executor-cores: 8 + executor-memory: 18G + driver-memory: 2G + +image-load-sh-args: + local: + jvm: -Xmx8g -XX:+UseG1GC -Dspark.master=local[*] + spark-embedded: + jvm: -Xmx8g -XX:+UseG1GC -Dspark.master=local[*] + spark-cluster: + conf: spark.default.parallelism=48 + num-executors: 8 + executor-cores: 8 + executor-memory: 18G + driver-memory: 2G + +uuid-sh-args: + local: + jvm: -Xmx8g -XX:+UseG1GC + spark-embedded: + jvm: -Xmx8g -XX:+UseG1GC + spark-cluster: + conf: spark.default.parallelism=48 + num-executors: 8 + executor-cores: 8 + executor-memory: 18G + driver-memory: 2G + +sampling-sh-args: + local: + jvm: -Xmx8g -XX:+UseG1GC -Dspark.master=local[*] + spark-embedded: + jvm: -Xmx8g -XX:+UseG1GC -Dspark.master=local[*] + spark-cluster: + conf: spark.default.parallelism=48 + num-executors: 8 + executor-cores: 8 + executor-memory: 18G + driver-memory: 2G + +outlier-sh-args: + local: + jvm: -Xmx8g -XX:+UseG1GC -Dspark.master=local[*] + spark-embedded: + jvm: -Xmx8g -XX:+UseG1GC -Dspark.master=local[*] + spark-cluster: + conf: spark.default.parallelism=48 + num-executors: 8 + executor-cores: 8 + executor-memory: 18G + driver-memory: 2G + +sensitive-sh-args: + spark-embedded: + jvm: -Xmx8g -XX:+UseG1GC + spark-cluster: + jvm: -Xmx8g -XX:+UseG1GC + conf: spark.default.parallelism=48 + num-executors: 8 + executor-cores: 8 + executor-memory: 18G + driver-memory: 2G + +sample-sh-args: + local: + jvm: -Xmx8g -XX:+UseG1GC + +index-sh-args: + local: + jvm: -Xmx8g -XX:+UseG1GC + spark-embedded: + jvm: -Xmx8g -XX:+UseG1GC + spark-cluster: + conf: spark.default.parallelism=48 + num-executors: 8 + executor-cores: 8 + executor-memory: 18G + driver-memory: 2G + +jackknife-sh-args: + local: + jvm: -Xmx8g -XX:+UseG1GC + spark-embedded: + jvm: -Xmx8g -XX:+UseG1GC + spark-cluster: + conf: spark.default.parallelism=48 + num-executors: 8 + executor-cores: 8 + executor-memory: 18G + driver-memory: 2G + +clustering-sh-args: + local: + jvm: -Xmx8g -XX:+UseG1GC + spark-embedded: + jvm: -Xmx8g -XX:+UseG1GC + spark-cluster: + conf: spark.default.parallelism=48 + num-executors: 8 + executor-cores: 8 + executor-memory: 18G + driver-memory: 2G + +solr-sh-args: + local: + jvm: -Xmx8g -XX:+UseG1GC + spark-embedded: + jvm: -Xmx8g -XX:+UseG1GC + spark-cluster: + conf: spark.default.parallelism=500 + num-executors: 8 + executor-cores: 8 + executor-memory: 18G + driver-memory: 2G diff --git a/livingatlas/solr/scripts/update-solr-cluster-config.sh b/livingatlas/solr/scripts/update-solr-cluster-config.sh index 1afb61f821..c7818c0a14 100755 --- a/livingatlas/solr/scripts/update-solr-cluster-config.sh +++ b/livingatlas/solr/scripts/update-solr-cluster-config.sh @@ -9,10 +9,10 @@ zip config.zip * #curl -X GET "http://localhost:8987/solr/admin/collections?action=DELETE&name=biocache3" #echo 'Deleting existing configset' -curl -X GET "http://localhost:8988/solr/admin/configs?action=DELETE&name=biocache&omitHeader=true" +curl -X GET "http://localhost:8983/solr/admin/configs?action=DELETE&name=biocache&omitHeader=true" echo 'Creating configset' -curl -X POST --header "Content-Type:application/octet-stream" --data-binary @config.zip "http://localhost:8988/solr/admin/configs?action=UPLOAD&name=biocache" +curl -X POST --header "Content-Type:application/octet-stream" --data-binary @config.zip "http://localhost:8983/solr/admin/configs?action=UPLOAD&name=biocache_dev" #echo 'Creating collection' #curl -X GET "http://localhost:8986/solr/admin/collections?action=CREATE&name=biocache&numShards=8&maxShardsPerNode=1&replicationFactor=1&collection.configName=biocache"