Skip to content

Commit

Permalink
fix #28, missing attribute names and descriptions
Browse files Browse the repository at this point in the history
  • Loading branch information
kzuberi committed Jul 17, 2015
1 parent 0eb5db5 commit d1b948e
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 5 deletions.
5 changes: 2 additions & 3 deletions builder/extract_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,11 @@ def extract_attributes(input_files, groups_file, output_file, organism_id,
group_id = matched[0]

attributes = pd.read_csv(input_file, sep='\t', header=False,
names=['NAME', 'DESCRIPTION'],
names=['EXTERNAL_ID', 'NAME', 'DESCRIPTION'],
dtype='str', na_filter=False)

attributes['ATTRIBUTE_GROUP_ID'] = group_id
attributes['ORGANISM_ID'] = organism_id
attributes['EXTERNAL_ID'] = attributes['NAME']

all_attributes.append(attributes)

Expand Down Expand Up @@ -89,4 +88,4 @@ def extract_attributes(input_files, groups_file, output_file, organism_id,
extract_attributes(args.inputs, args.groups, args.output, args.organism_id,
args.key_lstrip, args.key_rstrip)
else:
raise Exception('unexpected command')
raise Exception('unexpected command')
4 changes: 2 additions & 2 deletions builder/update_attribute_descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def main(attribute_file, description_file, output_file):
names=['GENE', 'ATTRIBUTE'])

descs = pd.read_csv(description_file, sep='\t', header=None, na_filter=False,
names=['ATTRIBUTE', 'DESCRIPTION'])
names=['ATTRIBUTE', 'NAME', 'DESCRIPTION'])

# remove duplicates in the descriptions
descs.drop_duplicates(subset=['ATTRIBUTE'], inplace=True)
Expand All @@ -27,7 +27,7 @@ def main(attribute_file, description_file, output_file):
attribs.drop_duplicates(inplace=True)

# left merge, to grab only the needed descriptions
output = pd.merge(attribs[['ATTRIBUTE']], descs[['ATTRIBUTE', 'DESCRIPTION']],
output = pd.merge(attribs[['ATTRIBUTE']], descs[['ATTRIBUTE', 'NAME', 'DESCRIPTION']],
left_on='ATTRIBUTE', right_on='ATTRIBUTE', how='left')

# empty string for missing descriptions
Expand Down

0 comments on commit d1b948e

Please sign in to comment.