v3.3.b

masikol · Dec 4, 2024 · 648c0cd · 648c0cd
1 parent 453e18f
commit 648c0cd
Show file tree

Hide file tree

Showing 11 changed files with 117 additions and 126 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,7 +2,11 @@
 
 # 2024-12-04 edition
 
-Version `3.3.a`
+## Version `3.3.b`
+
+Improved progress reporting.
+
+## Version `3.3.a`
 
 1. Added option `-r / --target-seq-ids`. One can use it to avoid wasting time annotating all sequences in a target fasta file if some of them are unwanted. Or to annotate sequences in a multi-fasta file separately (e.g. with different thresholds) without splitting the fasta file.
 
@@ -12,13 +16,13 @@ Version `3.3.a`
 
 # 2023-12-18 edition
 
-Version `3.2.a`
+## Version `3.2.a`
 
 Rename “consensus-highlighter” to “con-hi” for the sake of simplicity.
 
 # 2023-11-01 edition
 
-Version `3.1.b`
+## Version `3.1.b`
 
 Now, when outputting sequence records, the program will just print emerging generic Biopython warning in a human-friendly way, without extra technical lines.
 
@@ -39,21 +43,21 @@ What is more important, sole catching Biopython warnings will not now cause empt
 
 # 2023-10-29 edition
 
-Version `3.1.a`
+## Version `3.1.a`
 
 1. Fix a bug that would cause the program to terminate if `-C` threshold is enabled and a high-coverage region starts at position 0 or ends at position (LENGTH-1) of the reference sequence.
 
 2. Now, the program prints its own warning if length of an output sequence name is too long for "pretty" GenBank representation, according to GenBank standard (Dec 15, 2018) 229.0. In previous versions of the program, BioPython warning used to be emitted, which is not very informative: "Increasing length of locus line to allow long name. This will result in fields that are not in usual positions.".
 
 ## 2023-05-25 edition
 
-Version `3.0.b`
+## Version `3.0.b`
 
 Fix a bug preventing `con-hi` from parsing `samtools version` output correctly if `samtools` is compiled with a flag `-ffile-prefix-map`. In that case, the `samtools version` output contains some non-utf8 characters.
 
 ## 2022-07-26 edition
 
-Version `3.0.a`
+## Version `3.0.a`
 
 1. Add option `-l/min-feature-len`. It sets minimum length of an output feature.
 
@@ -67,23 +71,23 @@ Version `3.0.a`
 
 ## 2022-04-26 edition
 
-Version `2.3.a`
+## Version `2.3.a`
 
 1. Add recommendation "samtools `1.13` or later is recommended". This is the version, in which `samtools depth` [had beed](https://github.com/samtools/samtools/releases/tag/1.13) completely rewritten. Since 1.13, `samtools depth` calculates coverage more accurately.
 
 2. Now con-hi does not crash if samtools version is of the following format: `1.15.1` (three dot-separated numbers). Previously, only two dot-separated numbers were permitted.
 
 ## 2022-02-23 edition
 
-Version `2.2.b`
+## Version `2.2.b`
 
 Changes:
 1. Now con-hi removes its temporary file `coverages.tsv`, where coverage value of each base is stored.
 2. A bug was fixed that would cause the program to write error message about an unmet dependency to stout instead of stderr.
 
 ## 2022-02-10 edition
 
-Version `2.2.a`
+## Version `2.2.a`
 
 Now con-hi adds a comment to output GenBank files. Here is the example of such a comment:
 
@@ -99,33 +103,33 @@ COMMENT     ##Coverage-Data-START##
 
 ## 2022-01-24 edition
 
-Version `2.1.a`
+## Version `2.1.a`
 
 Now con-hi is compatible with samtools 1.13+.
 
 ## 2021-12-23 edition
 
-Version `2.0.b`
+## Version `2.0.b`
 
 Now con-hi calculates and prints average coverage.
 
 ## 2021-12-09 edition
 
-Version `2.0.a`
+## Version `2.0.a`
 
 Removed options `-o/--outdir` and `--prefix`.
 
 Added option `-o/--outfile`. And now con-hi.py writes all GenBank output records to this single output GenBank file.
 
 ## 2021-06-15 edition
 
-Version `1.1.a`
+## Version `1.1.a`
 
 `con-hi` no more piles up coverage features with identical locations. It means that you will not see both "zero coverage" and "coverage < 10" features starting at the same positions and ending at the same positions.
 
 ## 2021-06-11 edition
 
-Version `1.0.d`
+## Version `1.0.d`
 
 Added warning messages for following cases:
 
@@ -134,13 +138,13 @@ Added warning messages for following cases:
 
 ## 2021-06-10 edition
 
-Version `1.0.b`
+## Version `1.0.b`
 
 Fixed bug that would cause the program to stumble on a non-extant directory in PATH while checking dependencies.
 
 And then...
 
-Version `1.0.c`
+## Version `1.0.c`
 
 Fixed bug that cause the program to stumble on lowercase input sequences.
 

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 “Con-hi” means “**con**sensus-**hi**ghlighter”.
 
-Latest version is `3.3.a` (2024-12-04 edition).
+Latest version is `3.3.b` (2024-12-04 edition).
 
 ## Description
 

diff --git a/con-hi.py b/con-hi.py
@@ -2,20 +2,28 @@
 
 import sys
 
-__version__: str  = '3.3.a'
+__version__  = '3.3.b'
 # Year, month, day
-__last_update_date__: str = '2024-12-04'
-__min_python_version__: float = 3.6
+__last_update_date__ = '2024-12-04'
+__min_python_version__ = 3.6
 # __author__ = 'Maksim Sikolenko'
 
 
+import logging
+logging.basicConfig(
+    format='%(asctime)s: %(levelname)s -- %(message)s',
+    # datefmt='%Y-%m-%d %I:%M:%S %p',
+    level=logging.INFO
+)
+
+
 # === Check python interpreter version ===
 if sys.version_info.major + sys.version_info.minor*0.1 < __min_python_version__:
-    print(
+    logging.critical(
         'Your python interpreter version is ' + '%d.%d' %
         (sys.version_info.major, sys.version_info.minor)
     )
-    print('  Please, use Python %.1f+.\a' % __min_python_version__)
+    logging.critical('  Please, use Python %.1f+.\a' % __min_python_version__)
     # In python 2 'raw_input' does the same thing as 'input' in python 3.
     # Neither does 'input' in python2.
     if sys.platform.startswith('win'):
@@ -51,7 +59,7 @@
 
 # === Print name of the program and version ===
 
-print('\ncon-hi - Version {}'.format(__version__))
+print('\n== con-hi - Version {} ==\n'.format(__version__))
 
 
 # === Check dependencies ===

diff --git a/src/arguments.py b/src/arguments.py
@@ -3,9 +3,9 @@
 import re
 import sys
 import getopt
+import logging
 from typing import List, Sequence, Iterable, Set
 
-from src.printing import print_err
 from src.platform import platf_depend_exit
 
 
@@ -92,17 +92,17 @@ def parse_arguments() -> HighlighterArgs:
             ]
         )
     except getopt.GetoptError as err:
-        print_err(str(err))
+        logging.error(str(err))
         platf_depend_exit(2)
     # end try
 
     # Check positional arguments: their existance is an error signal
     if len(args) != 0:
-        print_err('Error: con-hi.py does not take any positional arguments.')
-        print_err('You passed following positional argument(s):')
+        logging.error('Error: con-hi.py does not take any positional arguments.')
+        logging.error('You passed following positional argument(s):')
         arg: str
         for arg in args:
-            print_err(f'  `{arg}`')
+            logging.error(f'`{arg}`')
         # end for
         platf_depend_exit(2)
     # end if
@@ -112,12 +112,12 @@ def parse_arguments() -> HighlighterArgs:
 
     # Check mandatory options
     if args.target_fasta_fpath is None:
-        print_err('Error: option `-f` (`--target-fasta`) is mandatory.')
+        logging.error('Error: option `-f` (`--target-fasta`) is mandatory.')
         platf_depend_exit(2)
     # end if
 
     if args.bam_fpath is None:
-        print_err('Error: option `-b` (`--bam`) is mandatory.')
+        logging.error('Error: option `-b` (`--bam`) is mandatory.')
         platf_depend_exit(2)
     # end if
 
@@ -152,35 +152,34 @@ def _parse_options(opts: List[List[str]]) -> HighlighterArgs:
         if opt in ('-f', '--target-fasta'):
 
             if not os.path.isfile(arg):
-                print_err(f'\aError: file {arg}` does not exist.')
+                logging.error(f'\aError: file {arg}` does not exist.')
                 platf_depend_exit(2)
             # end if
 
             if not _is_fasta(arg):
-                print_err('\aError: only plain fasta or gzipped fasta are supported.')
-                print_err(f'Erroneous file: `{arg}`.')
-                print_err('Allowed extentions: `.fasta`, `.fa`, `.fasta.gz`, `.fa.gz`.')
+                logging.error('\aError: only plain fasta or gzipped fasta are supported.')
+                logging.error(f'Erroneous file: `{arg}`.')
+                logging.error('Allowed extentions: `.fasta`, `.fa`, `.fasta.gz`, `.fa.gz`.')
                 platf_depend_exit(2)
             # end if
 
             args.target_fasta_fpath = arg
 
         # Target reference sequence IDs
         elif opt in ('-r', '--target-seq-ids'):
-            # TODO: check if all seqids are in target fasta
             args.target_seq_ids = set(arg.split(_ARG_SEP))
 
         # BAM file
         elif opt in ('-b', '--bam'):
 
             if not os.path.isfile(arg):
-                print_err(f'\aError: file {arg}` does not exist.')
+                logging.error(f'\aError: file {arg}` does not exist.')
                 platf_depend_exit(2)
             # end if
 
             if not _is_bam(arg):
-                print_err(f'\aError: file `{arg}` does not seem like a BAM file.')
-                print_err('The program only accepts BAM files having `.bam` extentions')
+                logging.error(f'\aError: file `{arg}` does not seem like a BAM file.')
+                logging.error('The program only accepts BAM files having `.bam` extentions')
                 platf_depend_exit(2)
             # end if
 
@@ -201,11 +200,11 @@ def _parse_options(opts: List[List[str]]) -> HighlighterArgs:
 
             if any(map(_coverage_not_parsable, cov_strings)):
                 invalid_strings: Iterable[str] = filter(_coverage_not_parsable, cov_strings)
-                print_err(f'\aError: invalid coverage thresholds in `{arg}`:')
+                logging.error(f'\aError: invalid coverage thresholds in `{arg}`:')
                 for s in invalid_strings:
-                    print_err(f'  `{s}`')
+                    logging.error(f'  `{s}`')
                 # end for
-                print_err('Coverage thesholds must be positive integer numbers.')
+                logging.error('Coverage thesholds must be positive integer numbers.')
                 platf_depend_exit(2)
             # end if
 
@@ -227,11 +226,11 @@ def _parse_options(opts: List[List[str]]) -> HighlighterArgs:
 
             if any(map(_coefficient_not_parsable, coef_strings)):
                 invalid_strings: Iterable[str] = filter(_coefficient_not_parsable, coef_strings)
-                print_err(f'\aError: invalid coverage coefficient in `{arg}`:\n')
+                logging.error(f'\aError: invalid coverage coefficient in `{arg}`:')
                 for s in invalid_strings:
-                    print_err(f'  `{s}`\n')
+                    logging.error(f'`{s}`')
                 # end for
-                print_err('Coverage coefficients must be positive numbers.\n')
+                logging.error('Coverage coefficients must be positive numbers.')
                 platf_depend_exit(2)
             # end if
 
@@ -254,8 +253,8 @@ def _parse_options(opts: List[List[str]]) -> HighlighterArgs:
                     raise ValueError
                 # end if
             except ValueError:
-                print_err(f'\aError: invalid minimum feature length: `{arg}`')
-                print_err('It must be non-negative negative number.')
+                logging.error(f'\aError: invalid minimum feature length: `{arg}`')
+                logging.error('It must be non-negative negative number.')
                 platf_depend_exit(2)
             # end try
             args.min_feature_len = min_feature_len

diff --git a/src/dedupl_features.py b/src/dedupl_features.py
@@ -14,7 +14,7 @@ def dedupl_features(
     # :param new_features: list of recently discovered features;
     # :param extant_features: list of features that existed before `new_features`
     #   was discovered;
-    
+
     # Create the tuple of coordinates in order not to extract these locations each time
     extant_coords: MutableSequence[Tuple[int, int]] = tuple(
         (