diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e0890ac..2185e65 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,7 +16,7 @@ jobs: max-parallel: 10 matrix: python-version: ['3.11', '3.10', '3.9', '3.8', '3.7'] - os: [ubuntu, macos] + os: [ubuntu-latest, macos-latest] exclude: # mac os: exclude all but the last two (available) python releases - os: macos diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 157fc73..c5cb4f5 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -7,7 +7,7 @@ on: jobs: build-n-publish: name: build and publish packages to PyPI and TestPyPI - runs-on: ubuntu + runs-on: ubuntu-latest steps: - name: checkout diff --git a/pyfaidx/__init__.py b/pyfaidx/__init__.py index c8aec44..55d85e3 100644 --- a/pyfaidx/__init__.py +++ b/pyfaidx/__init__.py @@ -304,6 +304,34 @@ def gc(self): c = self.seq.count('C') c += self.seq.count('c') return (g + c) / len(self.seq) + + @property + def gc_strict(self): + """ Return the GC content of seq as a float, ignoring non ACGT characters + >>> x = Sequence(name='chr1', seq='NMRATCGTA') + >>> y = round(x.gc, 2) + >>> y == 0.33 + True + """ + trimSeq = re.sub(r'[^ACGT]', '', self.seq.upper()) + gc = sum(trimSeq.count(i) for i in ['G','C']) + return gc / len(trimSeq) + + @property + def gc_iupac(self): + """ Return the GC content of seq as a float, accounting for IUPAC ambiguity + >>> x = Sequence(name='chr1', seq='NMRATCGTA') + >>> y = round(x.gc, 2) + >>> y == 0.36 + True + """ + trimSeq = re.sub(r'[^ACGTMRWSYKVHDBN]', '', self.seq.upper()) + gc = sum(trimSeq.count(i) for i in ['S','C','G']) + gc += sum(trimSeq.count(i) for i in ['B','V']) * 0.67 + gc += sum(trimSeq.count(i) for i in ['M','R','Y','K']) * 0.5 + gc += sum(trimSeq.count(i) for i in ['H','D']) * 0.33 + gc += trimSeq.count('N') * 0.25 + return gc / len(trimSeq) class IndexRecord(