forked from okfde/dokukratie
-
Notifications
You must be signed in to change notification settings - Fork 0
/
workflow.yml.tmpl
79 lines (77 loc) · 2.33 KB
/
workflow.yml.tmpl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
name: <scraper_name>
on:
workflow_dispatch:
inputs:
start_date:
description: 'Start date'
end_date:
description: 'End date'
legislative_term:
description: 'Legislative term'
document_type:
description: 'Document type'
schedule:
- cron: "0 3 * * *"
jobs:
scrape:
runs-on: ubuntu-latest
services:
redis:
image: redis:alpine
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379
env:
ARCHIVE_TYPE: s3
ARCHIVE_BUCKET: ${{ secrets.ARCHIVE_BUCKET }}
ARCHIVE_ENDPOINT_URL: ${{ secrets.ARCHIVE_ENDPOINT_URL }}
DATA_BUCKET: ${{ secrets.DATA_BUCKET }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: eu-central-1
AWS_DEFAULT_REGION: eu-central-1
MEMORIOUS_CONFIG_PATH: dokukratie
MEMORIOUS_EXPIRE: 30
MEMORIOUS_HTTP_TIMEOUT: 60
MEMORIOUS_CONTINUE_ON_ERROR: 1
MEMORIOUS_DATASTORE_URI: ${{ secrets.MEMORIOUS_DATASTORE_URI }}
REDIS_URL: redis://localhost:6379/0
steps:
- uses: actions/checkout@v2
with:
ref: main
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: "3.9"
- name: Install dependencies
env:
DEBIAN_FRONTEND: noninteractive
run: |
sudo apt-get install -y -qq libicu-dev libpq-dev
pip install --no-cache-dir -q pyicu awscli
pip install --no-cache-dir -q -e .
pip install --no-cache-dir -q -r requirements-prod.txt
- name: Pull mmmeta
run: |
make <scraper_name>.pull
- name: Run crawler
env:
START_DATE: ${{ github.event.inputs.start_date }}
END_DATE: ${{ github.event.inputs.end_date }}
LEGISLATIVE_TERMS: ${{ github.event.inputs.legislative_term }}
DOCUMENT_TYPES: ${{ github.event.inputs.document_type }}
run: |
make <scraper_name>.run_prod
- name: Generate mmmeta
if: always()
run: |
make <scraper_name>.mmmeta
- name: Upload data
if: always()
run: |
make <scraper_name>.upload