-
Notifications
You must be signed in to change notification settings - Fork 2
/
formatlogs
executable file
·77 lines (52 loc) · 2.29 KB
/
formatlogs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/bin/bash
# this script is for extracting and formatting facebook messages between two people (party A and party B)
# the original logs are obtained by downloading a copy of your facebook data
# the script may not output logs in perfect chronological order, it was written under the assumption that the data
# dumps contain conversations in reverse chronological order split by person, but this is not _always_ true
# party A should be the person who's last name comes LATER in the alphabet:
# usage: ./formatlogs.sh FILE "Anna" "Zed" "Joe" "Apple"
if [ $# -ne 5 ]; then
echo "usage: $0 path/to/messages.htm AFIRST ALAST BFIRST BLAST" 1>&2
exit 1
fi
FILE=$1 #the path to the file containing the unformatted messages
AFIRST=$2 #first name of party A (the person whose facebook data was downloaded)
ALAST="$3," #last name of party A
BFIRST=$4 #first name of party B
BLAST=$5 #last name of party B
PAIR="$AFIRST $ALAST $BFIRST $BLAST" #the actual string used
echo "negotiating relineations..."
sed 's/<span class="user">/\n/g' $FILE > temp1
echo "surveying message threading... triplicating results..."
sed -n "/div class=\"thread\">$PAIR/=" temp1 > linenumbers
x=0
echo "preparing control flow..."
for line in `cat linenumbers`; do
echo "enacting partitioning scheme..."
tail -n +${line} temp1 > block${x}
l=`grep -n "<div class=\"thread\">" block${x} | cut -f1 -d: | head -2 | tail -n +2`
head -${l} block${x} > blocks${x}
x=$(($x+1))
done
echo "temporal distillation in effect..."
tac blocks* > temp0
echo "personalizing results..."
egrep "^($BFIRST|$AFIRST)" temp0 > temp1
echo "removing meta..."
sed 's/<\/span><span class="meta">/\n/g' temp1 > temp2
echo "antispanning... reconciliating divides..."
sed 's/<\/span><\/div><\/div><p>/\n\n/g' temp2 > temp3
echo "stripping headers..."
sed 's/<\/p><div class="message"><div class="message_header">/\n/g' temp3 > temp4
echo "achieving escape... \`"
sed 's/'/`/g' temp4 > temp5
echo 'achieving escape... "'
sed 's/"/"/g' temp5 > temp6
echo "disabling safeguards..."
sed 's/<\/p><\/div><div class="thread">.*<div class="message"><div class="message_header">/\n/g' temp6 > temp7
echo "achieving escape... <"
sed 's/</</g' temp7 > "${AFIRST}-${BFIRST}-logs.txt"
echo "destroying evidence..."
rm linenumbers
rm temp*
rm block*