Skip to content

Commit

Permalink
fixed IDs of extra copies in polish step, fixed write header to stdout
Browse files Browse the repository at this point in the history
  • Loading branch information
agshumate committed Feb 3, 2022
1 parent 0211f22 commit 35a4e55
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 30 deletions.
10 changes: 10 additions & 0 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion liftoff/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.6.2'
__version__ = '1.6.3'
2 changes: 1 addition & 1 deletion liftoff/run_liftoff.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def parse_args(arglist):
"target and "
"reference; by default F=0.0")

parser.add_argument('-V', '--version', help='show program version', action='version', version='v1.6.2')
parser.add_argument('-V', '--version', help='show program version', action='version', version='v1.6.3')
parser.add_argument(
'-p', default=1, type=int, metavar='P', help='use p parallel processes to accelerate alignment; by default p=1'
)
Expand Down
58 changes: 31 additions & 27 deletions liftoff/write_new_gff.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
from liftoff import liftoff_utils, __version__
import sys


def write_header(f, out_type):
f.write("# " + " ".join(sys.argv) + "\n")
f.write("# Liftoff v" + __version__ + "\n")
if out_type == 'gff3':
f.write('##gff-version 3' + "\n")
f.write("# Liftoff v" + __version__ + "\n")
f.write("# " + " ".join(sys.argv) + "\n")


def write_new_gff(lifted_features, args, feature_db):
if args.o != 'stdout':
f = open(args.o, 'w')
else:
f = "stdout"
f = sys.stdout
out_type = feature_db.dialect['fmt']
write_header(f, out_type)
parents = liftoff_utils.get_parent_list(lifted_features)
Expand Down Expand Up @@ -77,53 +78,56 @@ def write_feature(children, outfile, child_features, parent_dict, output_type):
for child in children:
write_line(child, outfile, output_type)
if child.id in parent_dict:
new_children = parent_dict[child.id]
new_children = parent_dict[child.id ]
write_feature(new_children, outfile, child_features, parent_dict, output_type)
return


def write_line(feature, out_file, output_type):
if output_type == 'gff3':
line = make_gff_line(feature)
if feature.attributes["extra_copy_number"][0]!='0':
attr_dict = edit_copy_ids(feature)
else:
line = make_gtf_line(feature)
if out_file == "stdout":
print(line)
attr_dict = feature.attributes
if output_type == 'gff3':
line = make_gff_line(attr_dict, feature)
else:
out_file.write(line)
out_file.write("\n")
line = make_gtf_line(attr_dict, feature)
out_file.write(line)
out_file.write("\n")


def make_gff_line(feature):
edit_copy_ids(feature)
attributes_str = "ID=" + feature.attributes["ID"][0] + ";" #make ID the first printed attribute
for attr in feature.attributes:
def make_gff_line(attr_dict, feature):
attributes_str = "ID=" + attr_dict["ID"][0] + ";" #make ID the first printed attribute
for attr in attr_dict:
if attr != "copy_id":
value_str = ""
for value in feature.attributes[attr]:
value_str += value + ","
for value in attr_dict[attr]:
value_str += value + ","
if attr != "ID":
attributes_str += (attr + "=" + value_str[:-1] + ";")
return feature.seqid + "\t" + feature.source + "\t" + feature.featuretype + "\t" + str(feature.start) + \
"\t" + str(feature.end) + "\t" + "." + "\t" + feature.strand + "\t" + "." + "\t" + attributes_str[:-1]

def edit_copy_ids(feature):
new_attr_dict = feature.attributes.copy()
copy_num = feature.attributes["extra_copy_number"][0]
if copy_num != '0':
feature.attributes["ID"] = [feature.attributes["ID"][0]+ "_" + copy_num]
if "Parent" in feature.attributes:
feature.attributes["Parent"] = [feature.attributes["Parent"][0] + "_" + copy_num]
if "gene_id" in feature.attributes:
feature.attributes["gene_id"] = [feature.attributes["gene_id"][0] + "_" + copy_num]
for attr in feature.attributes:
if attr[-3:] == "_id":
new_attr_dict[attr] = [feature.attributes[attr][0] + "_" + copy_num]
elif attr == 'ID':
new_attr_dict["ID"] = [feature.attributes["ID"][0]+ "_" + copy_num]
elif attr == "Parent":
new_attr_dict["Parent"] = [feature.attributes["Parent"][0] + "_" + copy_num]
return new_attr_dict


def make_gtf_line(feature):
def make_gtf_line(attr_dict, feature):
attributes_str = ""
for attr in feature.attributes:
for attr in attr_dict:
if attr != "copy_id":
if len(feature.attributes[attr]) >0:
if len(attr_dict[attr]) >0:
value_str = ""
for value in feature.attributes[attr]:
for value in attr_dict[attr]:
value_str += value + ","
attributes_str += (attr + " " + '"' + value_str[:-1] + '"' + "; ")
return feature.seqid + "\t" + feature.source + "\t" + feature.featuretype + "\t" + str(feature.start) + \
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setuptools.setup(
name="Liftoff",
version="1.6.2",
version="1.6.3",
author="Alaina Shumate",
author_email="[email protected]",
description="A gene annotation mapping tool",
Expand Down

0 comments on commit 35a4e55

Please sign in to comment.