forked from ml-tuw/ml-tuw.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
/
update_bibliography.sh
executable file
·52 lines (42 loc) · 1.55 KB
/
update_bibliography.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/bin/env bash
raw=./publications/repositum_extract.html
select=./publications/repositum_select.html
clean=./publications/repositum_select.md
header=./publications/repositum_header.md
pubs=./publications/index.md
# download current paper list of 194_06 from repositum
curl https://repositum.tuwien.at/rest/orgunit/publications_html/tiss_id/6233 \
> $raw
awk '/<h3>Journal Articles<\/h3>/{flag=1;next}/<h3>Presentations<\/h3>/{flag=0}flag' \
< $raw \
> $select
# write a custom markdown header
echo \
'---
layout: entitled
title: Publications
---
<h3>Journal Articles</h3>
' \
> $header
# remove messy repositum markup
# replace '>https://doi.org/...<' by '>(doi)<' to make links prettier
# last grep keeps all lines containing any of the author names (e.g. Gärtner)
# so you may add new group members, but should not remove old ones
# (otherwise, old publications vanish)
cat $select \
| sed 's/ <div class="csl-entry">/- /g' \
| sed 's/<div class="csl-bib-body">//g' \
| sed "s/<div class='col-md-12 col-sm-12'>//g" \
| sed "s/<div class='row citation-entry'>//g" \
| sed 's-</div></div></div>-\n-g' \
| awk '{$1=$1; print} ' RS='</div>\n\n' FS='\n' OFS=" " \
| sed 's- reposiTUm-reposiTUm-g' \
| sed -E 's-(>https://doi.org/[^<]*)->\(doi\)-g' \
| grep -e 'Gärtner' -e 'Welke' -e 'Jogl' -e 'Thiessen' -e 'Indri' -e 'Drucks' -e 'Penz' -e 'Malhotra' -e 'Jahanjoo' -e 'Sandrock' -e 'Sepliarskaia' -e 'Schmidt' \
> $clean
# glue stuff together
cat $header $clean \
> $pubs
# clean up
rm $raw $select $clean $header