Skip to content

Commit

Permalink
Another fix for manage_revisions
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Oct 2, 2017
1 parent 817477a commit dca0f3b
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ def boolean(x):


def date(x):
if x is None:
return None

try:
x = float(x)
return datetime.date(1900, 1, 1) + datetime.timedelta(days=int(x))
Expand Down
18 changes: 14 additions & 4 deletions datapackage_pipelines_budgetkey/processors/manage-revisions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,16 @@ def calc_hash(row, hash_fields):
return hash


def get_all_existing_ids(connection_string, db_table, key_fields, hash_fields, schema):
def get_all_existing_ids(connection_string, db_table, key_fields, hash_fields, array_fields):
ret = DB()
storage = Storage(create_engine(connection_string))

if db_table in storage.buckets:
descriptor = storage.describe(db_table, schema)
db_fields = [f['name'] for f in descriptor['fields']]
descriptor = storage.describe(db_table)
for field in descriptor['fields']:
if field['name'] in array_fields:
field['type'] = 'array'
descriptor = storage.describe(db_table, descriptor)
for rec in storage.iter(db_table):
rec = dict(zip(db_fields, rec))
existing_id = dict(
Expand Down Expand Up @@ -121,6 +124,7 @@ def main():
resource_name = parameters['resource-name']
input_key_fields = parameters['key-fields']
input_hash_fields = parameters.get('hash-fields')
array_fields = parameters.get('array-fields', [])

for res in dp['resources']:
if resource_name == res['name']:
Expand All @@ -133,12 +137,18 @@ def main():
db_key_fields = parameters.get('db-key-fields', input_key_fields)
db_hash_fields = parameters.get('db-hash-fields', input_hash_fields)

schema_array_fields = [
field['name']
for field in res['schema']['fields']
if field['type'] == 'array'
]

existing_ids = \
get_all_existing_ids(connection_string,
parameters['db-table'],
db_key_fields,
db_hash_fields,
res['schema'])
array_fields + schema_array_fields)
break

assert existing_ids is not None
Expand Down

0 comments on commit dca0f3b

Please sign in to comment.