Skip to content

Commit

Permalink
clarify genecluster mode
Browse files Browse the repository at this point in the history
  • Loading branch information
boasvdp authored Oct 5, 2023
1 parent 9095c65 commit daad37f
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions extract_genes_abricate.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ def main_genecluster(df, args):
Returns:
None
"""
# This function needs refactoring
logging.debug(f"Calling function main_genecluster")
logging.debug(f"Parse multiple rows at once for gene cluster processing")
genome, combination, output = parse_multiple_rows(df, args.suffix, args.genomedir)
Expand All @@ -314,8 +315,11 @@ def main_genecluster(df, args):
combined_row = pd.Series({'SEQUENCE': most_common_contig, 'START': START_updated, 'END': END_updated})
logging.debug(f"Check how many hits are sense or antisense")
strand_series = df_most_common_contig['STRAND'].value_counts()
# Check if all genes are in same orientation
if ('+' in strand_series) and ('-' in strand_series):
logging.debug(f"Sense and antisense hits in abricate output")
# If there are more antisense than sense hits, reverse complement the sequence before writing to file
# If a result is written the majority of genes should be sense by then
if strand_series.loc['-'] > strand_series.loc['+']:
logging.debug(f"More antisense hits are found")
decision_strand = 'antisense'
Expand Down

0 comments on commit daad37f

Please sign in to comment.