Skip to content

Commit

Permalink
Fix molsa cfb scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Mar 13, 2019
1 parent 9855e71 commit 7b8da8e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 11 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from hashlib import md5
from dataflows import Flow, printer
from dataflows import Flow, printer, update_resource
from datapackage_pipelines.utilities.resources import PROP_STREAMING
import requests
from pyquery import PyQuery as pq

Expand Down Expand Up @@ -115,9 +116,15 @@ def flow(*args):
resolve_ordering_unit(),
fix_documents(),
calculate_publication_id(),
update_resource(
-1, name='molsa',
**{
PROP_STREAMING: True
}
),
printer()
)


if __name__ == '__main__':
flow().process()
# if __name__ == '__main__':
# flow().process()
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
scraper-exemptions:
calls-for-bids:
schedule:
crontab: "0 16 * * *"

pipeline:
- run: add_metadata
parameters:
name: calls-for-bids
# get the main HTML page of the exemptions search
- flow: molsa
runner: tzabar
- run: sample
- run: concatenate
parameters:
fields:
Expand All @@ -34,6 +34,9 @@ scraper-exemptions:
partners: []

documents: []
target:
name: calls_for_bids
path: calls_for_bids.csv
- run: set_types
parameters:
types:
Expand All @@ -45,10 +48,10 @@ scraper-exemptions:

start_date:
type: date
format: '%Y/%m/%d'
format: '%d/%m/%Y'
claim_date:
type: date
format: '%Y/%m/%d'
format: '%d/%m/%Y'

required_documents:
type: array
Expand All @@ -62,17 +65,18 @@ scraper-exemptions:
- {name: description, type: string}
- {name: update_time, type: string}


- run: set_primary_key
parameters:
calls_to_bids:
calls_for_bids:
- publication_id
- run: dump_to_path
parameters:
out-path: /var/datapackages/procurement/calls_to_bids
out-path: /var/datapackages/procurement/calls_for_bids
- run: dump_to_sql
parameters:
tables:
calls_to_bids:
resource-name: calls_to_bids
calls_for_bids:
resource-name: calls_for_bids
mode: update

0 comments on commit 7b8da8e

Please sign in to comment.