-
Notifications
You must be signed in to change notification settings - Fork 0
Data Mining
Jaimie Murdock edited this page Apr 24, 2018
·
3 revisions
Data mining currently runs on a separate ec2
instance, accessible only from the inside.
The server is launched and shutdown via a cronjob on the master
node.
sudo yum install git make gcc-c++
sudo useradd inphosite
sudo groupadd inpho
sudo usermod -G inpho inphosite
sudo usermod -a -G inphosite inphosite
sudo mkdir /var/inpho
sudo chown inphosite:inpho /var/inpho
sudo su inphosite
cd /tmp
wget https://repo.continuum.io/miniconda/Miniconda2-4.3.30-Linux-x86_64.sh
bash Miniconda2-4.3.30-Linux-x86_64.sh -b
echo 'export PATH="/home/inphosite/miniconda2/bin:$PATH"' >> ~/.bashrc
. ~/.bashrc
conda install -y mysql-python sphinx docutils sphinx nltk
python -m nltk.downloader punkt
cd /var/inpho
git clone https://github.com/inpho/inpho.git
cd inpho
python setup.py develop
scp inphoproject.org:/var/inpho/inpho.ini /var/inpho/
mkdir /var/inpho/data
scp inphoproject.org:/var/inpho/data/.apriori_config /var/inpho/data/
sudo mkdir /var/sep
sudo useradd sep
sudo usermod -G inpho sep
sudo chown sep:inpho /var/sep
Add this to /etc/fstab
:
fs-2c877655.efs.us-east-2.amazonaws.com:/ /var/sep nfs4 nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 0 0
mkdir /var/inpho/bin
#!/bin/sh
MINING_PATH=/var/inpho/inpho/inpho/corpus/sep.py
cd /var/inpho/data
nice python $MINING_PATH --all --occur
nice python $MINING_PATH --all
nice python $MINING_PATH --idea
nice python $MINING_PATH --thinker
nice python $MINING_PATH --load --all
nice python $MINING_PATH --load --idea
nice python $MINING_PATH --load --thinker
/usr/bin/time -v bash mining.sh 1> mining.log 2> mining.err
#!/bin/bash
INSTANCE_ID=i-0123456789abcdef
IP=`aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r .Reservations[].Instances[].PrivateIpAddress`
STATE=`aws ec2 describe-instances --instance-id $INSTANCE_ID | jq .Reservations[].Instances[].State.Code`
if [[ $STATE != 16 ]]; then
echo "starting $INSTANCE_ID"
aws ec2 start-instances --instance-ids $INSTANCE_ID
echo "waiting for $INSTANCE_ID"
aws ec2 wait instance-running --instance-ids $INSTANCE_ID && echo "$INSTANCE_ID running!"
else
echo "already started $INSTANCE_ID - CHECK IF UNEXPECTED!"
fi
#ssh -i ~/.ssh/inphoprojectaws.pem inphosite@$IP "echo 'here we are!'"
echo "begining data mining"
ssh inphosite@$IP "/var/inpho/bin/mining.sh" 1>/var/inpho/log/mining.log 2>/var/inpho/log/mining.err
aws ec2 stop-instances --instance-ids $INSTANCE_ID
- master
- mining
- sep-topics
- hypershelf