-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmedian string.py
31 lines (27 loc) · 908 Bytes
/
median string.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import itertools
k = 6
dna = '''TACCGTACTTAACAGACGTCTGACGCCCCTAATAACGGGCCA
TGAGCGTATGGCAAACAGATCCGGTAACTGCGGACTGCCCAT
GCTCCTGCGTCCTTGGACATCATCTCATGAGTAGTGGCCCAT
CGTGCTTGTACAAACTCGCCAAACATTTGACGGCTAGCCCAT
ACATAGGTACAGGTCGGTTCACATCGCAGAGCCCGTTGTGCC
TCCGTAAAGCCCATTAGAATAGATGCCCCTCTCGTCTCACGG
TGTGGATGGTGAACTAATACTCATGCCGCCGCCCTTCGCATA
GGCATCGCCCGTTAGCGCTGTTAAGTGCAGGAATCGCGCTAA
CGTGGATCATTAAGTTCAGCCCTTGGTCGAACTGCTGGGGCG
CCCGTAATGCTCGGTTGGCTTAGACTATGGGCCCATATGGTT'''.split()
bases = ['A', 'C', 'G', 'T']
kmers = [''.join(p) for p in itertools.product(bases, repeat=k)]
out={}
for kmer in kmers:
dd=0
for string in dna:
d=[]
for i in range(len(string)-k+1):
d.append(sum([x != y for x, y in zip(kmer, string[i:i+k])]))
dd += min(d)
out[kmer] = dd
for n in out:
if out[n] == min(out.values()):
print(n)
break