Skip to content
This repository has been archived by the owner on Nov 23, 2017. It is now read-only.

Elastic spark cluster #39

Open
wants to merge 6 commits into
base: branch-1.6
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions deploy.generic/root/spark-ec2/ec2-variables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
#

# These variables are automatically filled in by the spark-ec2 script.
export MASTERS="{{master_list}}"
export SLAVES="{{slave_list}}"
export HDFS_DATA_DIRS="{{hdfs_data_dirs}}"
export MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}"
export SPARK_LOCAL_DIRS="{{spark_local_dirs}}"
Expand Down
1 change: 1 addition & 0 deletions entities.generic/root/spark-ec2/masters
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{master_list}}
1 change: 1 addition & 0 deletions entities.generic/root/spark-ec2/slaves
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{slave_list}}
3 changes: 3 additions & 0 deletions ephemeral-hdfs/init_new_slaves.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

/root/spark-ec2/copy-dir /root/ephemeral-hdfs
38 changes: 38 additions & 0 deletions ephemeral-hdfs/setup_new_slaves.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash

EPHEMERAL_HDFS=/root/ephemeral-hdfs

# Set hdfs url to make it easier
export HDFS_URL="hdfs://$PUBLIC_DNS:9000"

pushd /root/spark-ec2/ephemeral-hdfs > /dev/null

for node in $NEW_SLAVES; do
echo $node
ssh -t -t $SSH_OPTS root@$node "/root/spark-ec2/ephemeral-hdfs/setup-slave.sh" & sleep 0.3
done
wait

/root/spark-ec2/copy-dir $EPHEMERAL_HDFS/conf

echo "Starting ephemeral HDFS..."

# This is different depending on version.
case "$HADOOP_MAJOR_VERSION" in
1)
$EPHEMERAL_HDFS/bin/start-dfs.sh
;;
2)
$EPHEMERAL_HDFS/sbin/start-dfs.sh
;;
yarn)
$EPHEMERAL_HDFS/sbin/start-dfs.sh
echo "Starting YARN"
$EPHEMERAL_HDFS/sbin/start-yarn.sh
;;
*)
echo "ERROR: Unknown Hadoop version"
return -1
esac

popd > /dev/null
9 changes: 9 additions & 0 deletions ganglia/init_new_slaves.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

# Install ganglia on new slaves
# TODO: Remove this once the AMI has ganglia by default

for node in $NEW_SLAVES; do
ssh -t -t $SSH_OPTS root@$node "if ! rpm --quiet -q $GANGLIA_PACKAGES; then yum install -q -y $GANGLIA_PACKAGES; fi" & sleep 0.3
done
wait
7 changes: 7 additions & 0 deletions ganglia/setup_new_slaves.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

/root/spark-ec2/copy-dir /etc/ganglia/

for node in $NEW_SLAVES; do
ssh -t -t $SSH_OPTS root@$node "/etc/init.d/gmond restart"
done
3 changes: 3 additions & 0 deletions mapreduce/init_new_slaves.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

/root/spark-ec2/copy-dir /root/mapreduce
9 changes: 9 additions & 0 deletions mapreduce/setup_new_slaves.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
MAPREDUCE=/root/mapreduce

for node in $NEW_SLAVES; do
ssh -t $SSH_OPTS root@$node "mkdir -p /mnt/mapreduce/logs && chown hadoop:hadoop /mnt/mapreduce/logs && chown hadoop:hadoop /mnt/mapreduce" & sleep 0.3
done
wait

/root/spark-ec2/copy-dir $MAPREDUCE/conf
1 change: 1 addition & 0 deletions new_slaves.generic/root/spark-ec2/new_slaves
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{slave_list}}
3 changes: 3 additions & 0 deletions persistent-hdfs/init_new_slaves.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

/root/spark-ec2/copy-dir /root/persistent-hdfs
21 changes: 21 additions & 0 deletions persistent-hdfs/setup_new_slaves.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

PERSISTENT_HDFS=/root/persistent-hdfs

pushd /root/spark-ec2/persistent-hdfs > /dev/null

for node in $NEW_SLAVES; do
ssh -t $SSH_OPTS root@$node "/root/spark-ec2/persistent-hdfs/setup-slave.sh" & sleep 0.3
done
wait

/root/spark-ec2/copy-dir $PERSISTENT_HDFS/conf

if [[ ! -e /vol/persistent-hdfs/dfs/name ]] ; then
echo "Formatting persistent HDFS namenode..."
$PERSISTENT_HDFS/bin/hadoop namenode -format
fi

echo "Persistent HDFS installed, won't start by default..."

popd > /dev/null
3 changes: 3 additions & 0 deletions scala/setup_new_slaves.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

/root/spark-ec2/copy-dir /root/scala
2 changes: 1 addition & 1 deletion setup-slave.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,4 @@ popd > /dev/null

# this is to set the ulimit for root and other users
echo '* soft nofile 1000000' >> /etc/security/limits.conf
echo '* hard nofile 1000000' >> /etc/security/limits.conf
echo '* hard nofile 1000000' >> /etc/security/limits.conf
12 changes: 5 additions & 7 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,10 @@ export HOSTNAME=$PRIVATE_DNS # Fix the bash built-in hostname variable too

echo "Setting up Spark on `hostname`..."

# Set up the masters, slaves, etc files based on cluster env variables
echo "$MASTERS" > masters
echo "$SLAVES" > slaves

MASTERS=`cat masters`
export MASTERS=`cat masters`
NUM_MASTERS=`cat masters | wc -l`
OTHER_MASTERS=`cat masters | sed '1d'`
SLAVES=`cat slaves`
export SLAVES=`cat slaves`
SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5"

if [[ "x$JAVA_HOME" == "x" ]] ; then
Expand Down Expand Up @@ -107,7 +103,9 @@ chmod u+x /root/spark/conf/spark-env.sh
for module in $MODULES; do
echo "Setting up $module"
module_setup_start_time="$(date +'%s')"
source ./$module/setup.sh
if [[ -e $module/setup.sh ]]; then
source ./$module/setup.sh
fi
sleep 0.1
module_setup_end_time="$(date +'%s')"
echo_time_diff "$module setup" "$module_setup_start_time" "$module_setup_end_time"
Expand Down
114 changes: 114 additions & 0 deletions setup_new_slaves.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#!/bin/bash

# usage: echo_time_diff name start_time end_time
echo_time_diff () {
local format='%Hh %Mm %Ss'

local diff_secs="$(($3-$2))"
echo "[timing] $1: " "$(date -u -d@"$diff_secs" +"$format")"
}

# Make sure we are in the spark-ec2 directory
pushd /root/spark-ec2 > /dev/null

# Load the environment variables specific to this AMI
source /root/.bash_profile

# Load the cluster variables set by the deploy script
source ec2-variables.sh

# Set hostname based on EC2 private DNS name, so that it is set correctly
# even if the instance is restarted with a different private DNS name
PRIVATE_DNS=`wget -q -O - http://169.254.169.254/latest/meta-data/local-hostname`
PUBLIC_DNS=`wget -q -O - http://169.254.169.254/latest/meta-data/hostname`
hostname $PRIVATE_DNS
echo $PRIVATE_DNS > /etc/hostname
export HOSTNAME=$PRIVATE_DNS # Fix the bash built-in hostname variable too

echo "Setting up Spark on `hostname`..."

export MASTERS=`cat masters`
NUM_MASTERS=`cat masters | wc -l`
OTHER_MASTERS=`cat masters | sed '1d'`
export SLAVES=`cat slaves`
export NEW_SLAVES=`cat new_slaves`
SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5"

if [[ "x$JAVA_HOME" == "x" ]] ; then
echo "Expected JAVA_HOME to be set in .bash_profile!"
exit 1
fi

if [[ `tty` == "not a tty" ]] ; then
echo "Expecting a tty or pty! (use the ssh -t option)."
exit 1
fi

echo "Setting executable permissions on scripts..."
find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x

echo "RSYNC'ing /root/spark-ec2 to other cluster nodes..."
rsync_start_time="$(date +'%s')"
for node in $SLAVES $OTHER_MASTERS; do
echo $node
rsync -e "ssh $SSH_OPTS" -az /root/spark-ec2 $node:/root &
scp $SSH_OPTS ~/.ssh/id_rsa $node:.ssh &
sleep 0.1
done
wait
rsync_end_time="$(date +'%s')"
echo_time_diff "rsync /root/spark-ec2" "$rsync_start_time" "$rsync_end_time"

echo "Running setup-slave on new slave nodes to mount filesystems, etc..."
setup_slave_start_time="$(date +'%s')"
pssh --inline \
--host "$NEW_SLAVES" \
--user root \
--extra-args "-t -t $SSH_OPTS" \
--timeout 0 \
"spark-ec2/setup-slave.sh"
setup_slave_end_time="$(date +'%s')"
echo_time_diff "setup-slave" "$setup_slave_start_time" "$setup_slave_end_time"

# Always include 'scala' module if it's not defined as a work around
# for older versions of the scripts.
if [[ ! $MODULES =~ *scala* ]]; then
MODULES=$(printf "%s\n%s\n" "scala" $MODULES)
fi

# Install / Init module
for module in $MODULES; do
echo "Initializing $module"
module_init_start_time="$(date +'%s')"
if [[ -e $module/init_new_slaves.sh ]]; then
source $module/init_new_slaves.sh
fi
module_init_end_time="$(date +'%s')"
echo_time_diff "$module init" "$module_init_start_time" "$module_init_end_time"
cd /root/spark-ec2 # guard against init.sh changing the cwd
done

# Deploy templates
# TODO: Move configuring templates to a per-module ?
echo "Creating local config files..."
./deploy_templates.py

# Copy spark conf by default
echo "Deploying Spark config files..."
chmod u+x /root/spark/conf/spark-env.sh
/root/spark-ec2/copy-dir /root/spark/conf

# Setup each module
for module in $MODULES; do
echo "Setting up $module"
module_setup_start_time="$(date +'%s')"
if [[ -e $module/setup_new_slaves.sh ]]; then
source ./$module/setup_new_slaves.sh
fi
sleep 0.1
module_setup_end_time="$(date +'%s')"
echo_time_diff "$module setup" "$module_setup_start_time" "$module_setup_end_time"
cd /root/spark-ec2 # guard against setup.sh changing the cwd
done

popd > /dev/null
18 changes: 18 additions & 0 deletions spark-standalone/setup_new_slaves.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

BIN_FOLDER="/root/spark/sbin"

if [[ "0.7.3 0.8.0 0.8.1" =~ $SPARK_VERSION ]]; then
BIN_FOLDER="/root/spark/bin"
fi

# Copy the slaves to spark conf
cp /root/spark-ec2/slaves /root/spark/conf/
/root/spark-ec2/copy-dir /root/spark/conf

# Set cluster-url to standalone master
echo "spark://""`cat /root/spark-ec2/masters`"":7077" > /root/spark-ec2/cluster-url
/root/spark-ec2/copy-dir /root/spark-ec2

# Start Workers
$BIN_FOLDER/start-slaves.sh
3 changes: 3 additions & 0 deletions spark/setup_new_slaves.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

/root/spark-ec2/copy-dir /root/spark
Loading