From d1b948ee3b4a8b0e2fcca6e3fe415bbf6f8ed89f Mon Sep 17 00:00:00 2001 From: Khalid Zuberi Date: Fri, 17 Jul 2015 00:33:34 -0400 Subject: [PATCH] fix #28, missing attribute names and descriptions --- builder/extract_attributes.py | 5 ++--- builder/update_attribute_descriptions.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/builder/extract_attributes.py b/builder/extract_attributes.py index 5a6bae1..0ff8407 100644 --- a/builder/extract_attributes.py +++ b/builder/extract_attributes.py @@ -43,12 +43,11 @@ def extract_attributes(input_files, groups_file, output_file, organism_id, group_id = matched[0] attributes = pd.read_csv(input_file, sep='\t', header=False, - names=['NAME', 'DESCRIPTION'], + names=['EXTERNAL_ID', 'NAME', 'DESCRIPTION'], dtype='str', na_filter=False) attributes['ATTRIBUTE_GROUP_ID'] = group_id attributes['ORGANISM_ID'] = organism_id - attributes['EXTERNAL_ID'] = attributes['NAME'] all_attributes.append(attributes) @@ -89,4 +88,4 @@ def extract_attributes(input_files, groups_file, output_file, organism_id, extract_attributes(args.inputs, args.groups, args.output, args.organism_id, args.key_lstrip, args.key_rstrip) else: - raise Exception('unexpected command') \ No newline at end of file + raise Exception('unexpected command') diff --git a/builder/update_attribute_descriptions.py b/builder/update_attribute_descriptions.py index 0413342..bff71b3 100644 --- a/builder/update_attribute_descriptions.py +++ b/builder/update_attribute_descriptions.py @@ -16,7 +16,7 @@ def main(attribute_file, description_file, output_file): names=['GENE', 'ATTRIBUTE']) descs = pd.read_csv(description_file, sep='\t', header=None, na_filter=False, - names=['ATTRIBUTE', 'DESCRIPTION']) + names=['ATTRIBUTE', 'NAME', 'DESCRIPTION']) # remove duplicates in the descriptions descs.drop_duplicates(subset=['ATTRIBUTE'], inplace=True) @@ -27,7 +27,7 @@ def main(attribute_file, description_file, output_file): attribs.drop_duplicates(inplace=True) # left merge, to grab only the needed descriptions - output = pd.merge(attribs[['ATTRIBUTE']], descs[['ATTRIBUTE', 'DESCRIPTION']], + output = pd.merge(attribs[['ATTRIBUTE']], descs[['ATTRIBUTE', 'NAME', 'DESCRIPTION']], left_on='ATTRIBUTE', right_on='ATTRIBUTE', how='left') # empty string for missing descriptions