Skip to content

Latest commit

 

History

History
106 lines (84 loc) · 6 KB

build-zeppelin-spark-2.1.md

File metadata and controls

106 lines (84 loc) · 6 KB

Execute the following commands:

./dev/change_scala_version.sh 2.11

mvn clean package -Pspark-2.1 -Phadoop-2.4 -Ppyspark -Psparkr -Pscala-2.11 -DskipTests

References:

https://github.com/apache/zeppelin#build

https://zeppelin.apache.org/docs/0.7.0/install/install.html

Notes:

  1. It will take a long time to download the necessary packages
  2. Use maven 3.3.9 or higher version
  3. After building, update the zeppelin jackson version to spark2.1's dependency version
  4. Set zeppelin_env.sh in advance as follows.
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
 
export JAVA_HOME=/usr/lib/jvm/java-1.7.0-openjdk-amd64
export MASTER="yarn-client"                   # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode.
export ZEPPELIN_JAVA_OPTS="-Dhdp.version=2.4.0"        # Additional jvm options. for example, export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16"
# export ZEPPELIN_MEM              # Zeppelin jvm mem options Default -Xmx1024m -XX:MaxPermSize=512m
# export ZEPPELIN_INTP_MEM         # zeppelin interpreter process jvm mem options. Default = ZEPPELIN_MEM
# export ZEPPELIN_INTP_JAVA_OPTS   # zeppelin interpreter process jvm options. Default = ZEPPELIN_JAVA_OPTS
export ZEPPELIN_PORT="10005"
 

# export ZEPPELIN_LOG_DIR          # Where log files are stored.  PWD by default.
# export ZEPPELIN_PID_DIR          # The pid files are stored. ${ZEPPELIN_HOME}/run by default.
# export ZEPPELIN_WAR_TEMPDIR      # The location of jetty temporary directory.
# export ZEPPELIN_NOTEBOOK_DIR     # Where notebook saved
# export ZEPPELIN_NOTEBOOK_HOMESCREEN  # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z
# export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE # hide homescreen notebook from list when this value set to "true". default "false"
# export ZEPPELIN_NOTEBOOK_S3_BUCKET        # Bucket where notebook saved
# export ZEPPELIN_NOTEBOOK_S3_ENDPOINT      # Endpoint of the bucket
# export ZEPPELIN_NOTEBOOK_S3_USER          # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
# export ZEPPELIN_IDENT_STRING     # A string representing this instance of zeppelin. $USER by default.
# export ZEPPELIN_NICENESS         # The scheduling priority for daemons. Defaults to 0.
# export ZEPPELIN_INTERPRETER_LOCALREPO         # Local repository for interpreter's additional dependency loading
# export ZEPPELIN_NOTEBOOK_STORAGE   # Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote).
 
#### Spark interpreter configuration ####
 
## Use provided spark installation ##
## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit
##
export HADOOP_HOME="/home/ad/hadoop/"
export SPARK_HOME="/home/ad/spark-2.0.0"                             # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries
# export SPARK_SUBMIT_OPTIONS                   # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G".
# export SPARK_APP_NAME                         # (optional) The name of spark application.
export SPARK_SUBMIT_OPTIONS="--executor-memory 4G --num-executors 5 --jars ${HADOOP_HOME}/lib/native/hadoop-lzo-0.4.20.jar"
 
## Use embedded spark binaries ##
## without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries.
## however, it is not encouraged when you can define SPARK_HOME
##
# Options read in YARN client mode
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"           # yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR.
# Pyspark (supported with Spark 1.2.1 and above)
# To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI
# export PYSPARK_PYTHON            # path to the python command. must be the same path on the driver(Zeppelin) and all workers.
# export PYTHONPATH
 
## Spark interpreter options ##
##
export ZEPPELIN_SPARK_USEHIVECONTEXT=false  # Use HiveContext instead of SQLContext if set true. true by default.
# export ZEPPELIN_SPARK_CONCURRENTSQL   # Execute multiple SQL concurrently if set true. false by default.
# export ZEPPELIN_SPARK_IMPORTIMPLICIT  # Import implicits, UDF collection, and sql if set true. true by default.
# export ZEPPELIN_SPARK_MAXRESULT       # Max number of Spark SQL result to display. 1000 by default.
# export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE       # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000
 

#### HBase interpreter configuration ####
 
## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set
 
# export HBASE_HOME=                    # (require) Under which HBase scripts and configuration should be
# export HBASE_CONF_DIR=                # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml
 
#### ZeppelinHub connection configuration ####
# export ZEPPELINHUB_API_ADDRESS  # Refers to the address of the ZeppelinHub service in use
# export ZEPPELINHUB_API_TOKEN   # Refers to the Zeppelin instance token of the user
# export ZEPPELINHUB_USER_KEY   # Optional, when using Zeppelin with authentication.