forked from upenn-libraries/discovery-app
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fetch_and_process_oai.sh
executable file
·70 lines (55 loc) · 1.66 KB
/
fetch_and_process_oai.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/bin/bash
# script that is meant to be run as a cron job, using jenkins, or
# through some other job scheduler.
# Currently the cron runs this as:
# cd /opt/discovery && ./run_in_container.sh ./fetch_and_process_oai.sh /var/solr_input_data/alma_production/oai >> /opt/discovery/log/fetch_and_process_oai.log 2>> /opt/discovery/log/fetch_and_process_oai.log
set_name=allTitles
skip_indexing=false
if [ -z "$1" ]
then
echo "Usage: fetch_and_process_oai.sh [--skip-indexing] OAI_DIR [FROM_TIMESTAMP]"
exit
else
if [ "$1" = "--skip-indexing" ]
then
skip_indexing=true
shift
fi
oai_dir="$1"
fi
set_dir="$oai_dir/$set_name"
batch_dir=$set_dir/`date +"%Y_%m_%d_%H_%M"`
mkdir -p $batch_dir
if [ -z "$2" ]
then
last_run=`cat $set_dir/LAST_RUN`
else
last_run="$2"
fi
if [ -z "$last_run" ]
then
echo "ERROR: No argument supplied and $oai_dir/LAST_RUN file not found. Can't proceed."
exit
fi
# format date as ISO8601, as expected by OAI
now=`date -u +"%Y-%m-%dT%H:%M:%SZ"`
echo "############################################################"
echo "#### OAI fetch and process started at `date`"
echo "Fetching from OAI"
./fetch_oai.rb $set_name "$last_run" "$now" $batch_dir
if [ $? != 0 ]; then
echo "ERROR: Something went wrong running fetch_oai.rb. Exiting script."
exit 1
fi
echo "Updating LAST_RUN file"
echo $now > $set_dir/LAST_RUN
echo "Running preprocessing tasks"
./preprocess_oai.sh "$batch_dir" "$set_name"
if [ "$skip_indexing" = false ]
then
echo "Running index_and_deletions.sh"
./index_and_deletions.sh "$batch_dir" "$set_name"
else
echo "Skipping indexing"
fi
echo "#### OAI fetch and process ended at `date`"