-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_cluster.conf
73 lines (62 loc) · 2.28 KB
/
test_cluster.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# ================
# SPARK PROPERTIES
# ================
# Properties to be passed to "spark-submit" command via "--properties-file"
# argument. This setting has higher priority than spark-defaults.conf or
# SparkConf setting in runtime (except for runtime control properties).
# For more information and full list of available properties, see [1].
# Spark.master value defines the cluster manager:
# local ... standalone local mode, use single thread (no parallelism)
# local[N] ... standalone local mode, use N cores
# local[*] ... standalone local mode, use all available cores
# spark://host:port ... standalone cluster mode
# + other cluster managers supported by spark (yarn, mesos, ...)
# For more information, see [2].
#spark.master local
#spark.master local[6]
#spark.master spark://host:port
spark.master yarn
#spark.submit.deployMode cluster
spark.submit.deployMode client
#spark.yarn.appMasterEnv.PYSPARK_PYTHON /opt/anaconda3/bin/python
# Name of the application
spark.app.name ArchiveProcessor
# Port setting
spark.blockManager.port 34000
spark.driver.port 34100
spark.port.maxRetries 100
# Memory setting (see e.g. [3])
spark.executor.memory 4g
spark.driver.memory 8g
spark.executor.cores 1
spark.executor.instances 8
spark.driver.maxResultSize 2g
#spark.memory.storageFraction 0.1
spark.task.maxFailures 100
spark.yarn.max.executor.failures 100000
# All important files needed by the driver or executors
spark.files \
stoplists/cs.txt,\
stoplists/de.txt,\
stoplists/en.txt,\
stoplists/fr.txt,\
stoplists/pl.txt,\
stoplists/ru.txt,\
stoplists/sk.txt,\
NLTK_data/tokenizers/punkt/PY3/czech.pickle,\
NLTK_data/tokenizers/punkt/PY3/english.pickle,\
NLTK_data/tokenizers/punkt/PY3/french.pickle,\
NLTK_data/tokenizers/punkt/PY3/german.pickle,\
NLTK_data/tokenizers/punkt/PY3/polish.pickle,\
pyfiles.zip,\
schema.json
spark.submit.pyFiles \
pyfiles.zip
# additional properties
spark.logConf true
spark.eventLog.enabled true
spark.eventLog.dir hdfs:///spark2-history/
# References:
# [1] http://spark.apache.org/docs/latest/configuration.html
# [2] http://spark.apache.org/docs/latest/submitting-applications.html#master-urls
# [3] https://blog.cloudera.com/how-to-tune-your-apache-spark-jobs-part-2/