Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

push me #3

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Practical10/Reflection.txt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
The practical is a bit hard for me.
Drawing the heat map is a very new experience, which is both interesting and challenging. Finding proper and simple ways to
reach a single goal demands high on the understanding of related functions. For example, I struggled much before finding
out the use of 'transpose' and 'nonzero', becaouse I tried to express the same meaning with many 'while' and 'if'.
One important thing I learned is that define functions can prevent many errors brought by nested functions, for instance,
a 'for' in another 'for' is easy to mix the results.
33 changes: 33 additions & 0 deletions Practical10/SIR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import numpy as np
import matplotlib.pyplot as plt
N = 10000 #total population
I = 1 #infected person
S = N - I #susceptive person
R = 0 #recovered person
beta = 0.3 #possibility of infection
gamma = 0.05 #possibility of recovery
sus = [S] # #create arrays for storing data
infected = [I]
recovered = [R]
time = [0]
for t in range(1, 1001):
contact = beta * infected[-1] / N #possibility of contact
new_infected = np.random.choice(sus[-1], 2, contact) #select latest S in sus for calculating new infected ones
new_recovered = np.random.choice(infected[-1], 2, gamma) #choose one in two as getting infected/recovered or not
sus_new = sus[-1] - new_infected
infected_new = infected[-1] + new_infected - new_recovered
recovered_new = recovered[-1] + new_recovered
sus.append(sus_new)
infected.append(infected_new)
recovered.append(recovered_new)
time.append(t)
plt.figure(figsize =(6, 4), dpi=150)
plt.xlabel('Time')
plt.ylabel('Number of People')
plt.title('SIR Model')
plt.plot(time, sus, label='Susceptible', color = 'green')
plt.plot(time, infected, label='Infected', color = 'red')
plt.plot(time, recovered, label='Recovered', color = 'blue')
plt.legend()
plt.savefig('figure', format = 'png')
plt.show()
38 changes: 38 additions & 0 deletions Practical10/SIR_vaccination.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import numpy as np
import matplotlib.pyplot as plt
N = 10000
I = 1
S = N - I
R = 0
beta = 0.3
gamma = 0.05
vaccine_rate = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
def vaccination(v_rate): #make former steps together a function,so that it's more convenient to test with different vaccination rates
S = N * (1 - v_rate) #vaccinated ones don't get infected
I= 1
R = 0
sus = [S]
infected = [I]
recovered = [R]
time = [0]
for t in range(1, 1001):
contact = beta * infected[-1] / N
new_infected = np.random.binomial(sus[-1], contact) #r.binomial(x,p) = r.choice(x,2,p)
new_recovered = np.random.binomial(infected[-1], gamma)
sus_new = sus[-1] - new_infected
infected_new = infected[-1] + new_infected - new_recovered
recovered_new = recovered[-1] + new_recovered
sus.append(sus_new)
infected.append(infected_new)
recovered.append(recovered_new)
time.append(t)
return time, infected #return these two values to draw the new figure
for v_rate in vaccine_rate:
newtime, newI = vaccination(v_rate)
plt.plot(newtime, newI,label = v_rate)
plt.xlabel('Time')
plt.ylabel('Number of People')
plt.title('SIR Model with Different Vaccination Rates')
plt.legend()
plt.savefig('infections_under_vaccination_rates.png', format='png')
plt.show()
26 changes: 26 additions & 0 deletions Practical10/spatial_SIR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import numpy as np
import matplotlib.pyplot as plt
population = np. zeros ( (100 , 100) )
outbreak = np. random . choice (range(100) ,2)
population [ outbreak [0] , outbreak [ 1 ] ] = 1
beta = 0.3
gamma = 0.05
def update(population, beta, gamma): #define the infection function
new_population = population.copy() #update the population figure
infected_positions = np.transpose(np.nonzero(population == 1)) #find the positions of infected(population=1)
for i, j in infected_positions:
for di, dj in [(0, 1), (0, -1), (1, 0), (-1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)]: #find neighbours
ni, nj = i + di, j + dj
if 0 <= ni < 100 and 0 <= nj < 100 and population[ni, nj] == 0: #if neighbour is S and in 100*100 figure
if np.random.rand() < beta: #satisfy the infection probability
new_population[ni, nj] = 1 #infect the neighbour
infected_indices = np.transpose(np.nonzero(new_population == 1)) #update infected positions after infection
for i, j in infected_indices:
if np.random.rand() < gamma: #satisfy the recovery probability
new_population[i, j] = 2 #revover the infected, recovered=2
return new_population
for i in range(1,101):
population = update(population, beta, gamma)
plt.figure(figsize =(6, 4), dpi=150)
plt.imshow(population, cmap = 'viridis', interpolation = 'nearest')
plt.show()
12 changes: 12 additions & 0 deletions Practical13/SLC6A4_HUMAN (1).fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
>sp|P31645|SC6A4_HUMAN Sodium-dependent serotonin transporter OS=Homo sapiens OX=9606 GN=SLC6A4 PE=1 SV=1
METTPLNSQKQLSACEDGEDCQENGVLQKVVPTPGDKVESGQISNGYSAVPSPGAGDDTR
HSIPATTTTLVAELHQGERETWGKKVDFLLSVIGYAVDLGNVWRFPYICYQNGGGAFLLP
YTIMAIFGGIPLFYMELALGQYHRNGCISIWRKICPIFKGIGYAICIIAFYIASYYNTIM
AWALYYLISSFTDQLPWTSCKNSWNTGNCTNYFSEDNITWTLHSTSPAEEFYTRHVLQIH
RSKGLQDLGGISWQLALCIMLIFTVIYFSIWKGVKTSGKVVWVTATFPYIILSVLLVRGA
TLPGAWRGVLFYLKPNWQKLLETGVWIDAAAQIFFSLGPGFGVLLAFASYNKFNNNCYQD
ALVTSVVNCMTSFVSGFVIFTVLGYMAEMRNEDVSEVAKDAGPSLLFITYAEAIANMPAS
TFFAIIFFLMLITLGLDSTFAGLEGVITAVLDEFPHVWAKRRERFVLAVVITCFFGSLVT
LTFGGAYVVKLLEEYATGPAVLTVALIEAVAVSWFYGITQFCRDVKEMLGFSPGWFWRIC
WVAISPLFLLFIICSFLMSPPQLRLFQYNYPYWSIILGYCIGTSSFICIPTYIAYRLIIT
PGTFKERIIKSITPETPTEIPCGDIRLNAV
12 changes: 12 additions & 0 deletions Practical13/SLC6A4_MOUSE (1).fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
>sp|Q60857|SC6A4_MOUSE Sodium-dependent serotonin transporter OS=Mus musculus OX=10090 GN=Slc6a4 PE=1 SV=4
METTPLNSQKVLSECKDKEDCQENGVLQKGVPTPADKAGPGQISNGYSAVPSTSAGDEAP
HSTPAATTTLVAEIHQGERETWGKKMDFLLSVIGYAVDLGNIWRFPYICYQNGGGAFLLP
YTIMAIFGGIPLFYMELALGQYHRNGCISIWKKICPIFKGIGYAICIIAFYIASYYNTII
AWALYYLISSFTDQLPWTSCKNSWNTGNCTNYFAQDNITWTLHSTSPAEEFYLRHVLQIH
QSKGLQDLGTISWQLALCIMLIFTIIYFSIWKGVKTSGKVVWVTATFPYIVLSVLLVRGA
TLPGAWRGVVFYLKPNWQKLLETGVWVDAAAQIFFSLGPGFGVLLAFASYNKFNNNCYQD
ALVTSVVNCMTSFVSGFVIFTVLGYMAEMRNEDVSEVAKDAGPSLLFITYAEAIANMPAS
TFFAIIFFLMLITLGLDSTFAGLEGVITAVLDEFPHIWAKRREWFVLIVVITCILGSLLT
LTSGGAYVVTLLEEYATGPAVLTVALIEAVVVSWFYGITQFCSDVKEMLGFSPGWFWRIC
WVAISPLFLLFIICSFLMSPPQLRLFQYNYPHWSIILGYCIGTSSVICIPIYIIYRLIST
PGTLKERIIKSITPETPTEIPCGDIRMNAV
12 changes: 12 additions & 0 deletions Practical13/SLC6A4_RAT (1).fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
>sp|P31652|SC6A4_RAT Sodium-dependent serotonin transporter OS=Rattus norvegicus OX=10116 GN=Slc6a4 PE=1 SV=1
METTPLNSQKVLSECKDREDCQENGVLQKGVPTTADRAEPSQISNGYSAVPSTSAGDEAS
HSIPAATTTLVAEIRQGERETWGKKMDFLLSVIGYAVDLGNIWRFPYICYQNGGGAFLLP
YTIMAIFGGIPLFYMELALGQYHRNGCISIWRKICPIFKGIGYAICIIAFYIASYYNTII
AWALYYLISSLTDRLPWTSCTNSWNTGNCTNYFAQDNITWTLHSTSPAEEFYLRHVLQIH
QSKGLQDLGTISWQLTLCIVLIFTVIYFSIWKGVKTSGKVVWVTATFPYIVLSVLLVRGA
TLPGAWRGVVFYLKPNWQKLLETGVWVDAAAQIFFSLGPGFGVLLAFASYNKFNNNCYQD
ALVTSVVNCMTSFVSGFVIFTVLGYMAEMRNEDVSEVAKDAGPSLLFITYAEAIANMPAS
TFFAIIFFLMLITLGLDSTFAGLEGVITAVLDEFPHIWAKRREWFVLIVVITCVLGSLLT
LTSGGAYVVTLLEEYATGPAVLTVALIEAVAVSWFYGITQFCSDVKEMLGFSPGWFWRIC
WVAISPLFLLFIICSFLMSPPQLRLFQYNYPHWSIVLGYCIGMSSVICIPTYIIYRLIST
PGTLKERIIKSITPETPTEIPCGDIRMNAV
70 changes: 70 additions & 0 deletions Practical13/alignment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
BLOSUM62 = '''A 4 0 -2 -1 -2 0 -2 -1 -1 -1 -1 -2 -1 -1 -1 1 0 0 -3 -2
C 0 9 -3 -4 -2 -3 -3 -1 -3 -1 -1 -3 -3 -3 -3 -1 -1 -1 -2 -2
D -2 -3 6 2 -3 -1 -1 -3 -1 -4 -3 1 -1 0 -2 0 -1 -3 -4 -3
E -1 -4 2 5 -3 -2 0 -3 1 -3 -2 0 -1 2 0 0 -1 -2 -3 -2
F -2 -2 -3 -3 6 -3 -1 0 -3 0 0 -3 -4 -3 -3 -2 -2 -1 1 3
G 0 -3 -1 -2 -3 6 -2 -4 -2 -4 -3 0 -2 -2 -2 0 -2 -3 -2 -3
H -2 -3 -1 0 -1 -2 8 -3 -1 -3 -2 1 -2 0 0 -1 -2 -3 -2 2
I -1 -1 -3 -3 0 -4 -3 4 -3 2 1 -3 -3 -3 -3 -2 -1 3 -3 -1
K -1 -3 -1 1 -3 -2 -1 -3 5 -2 -1 0 -1 1 2 0 -1 -2 -3 -2
L -1 -1 -4 -3 0 -4 -3 2 -2 4 2 -3 -3 -2 -2 -2 -1 1 -2 -1
M -1 -1 -3 -2 0 -3 -2 1 -1 2 5 -2 -2 0 -1 -1 -1 1 -1 -1
N -2 -3 1 0 -3 0 1 -3 0 -3 -2 6 -2 0 0 1 0 -3 -4 -2
P -1 -3 -1 -1 -4 -2 -2 -3 -1 -3 -2 -2 7 -1 -2 -1 -1 -2 -4 -3
Q -1 -3 0 2 -3 -2 0 -3 1 -2 0 0 -1 5 1 0 -1 -2 -2 -1
R -1 -3 -2 0 -3 -2 0 -3 2 -2 -1 0 -2 1 5 -1 -1 -3 -3 -2
S 1 -1 0 0 -2 0 -1 -2 0 -2 -1 1 -1 0 -1 4 1 -2 -3 -2
T 0 -1 -1 -1 -2 -2 -2 -1 -1 -1 -1 0 -1 -1 -1 1 5 0 -2 -2
V 0 -1 -3 -2 -1 -3 -3 3 -2 1 1 -3 -2 -2 -3 -2 0 4 -3 -1
W -3 -2 -4 -3 1 -2 -2 -3 -3 -2 -1 -4 -4 -2 -3 -3 -2 -3 11 2
Y -2 -2 -3 -2 3 -3 2 -1 -2 -1 -1 -2 -3 -1 -2 -2 -2 -1 2 7'''.split('\n')

for i in range(len(BLOSUM62)):
BLOSUM62[i] = BLOSUM62[i].split()

m={}
for i in range(len(BLOSUM62)):
for j in range(1, len(BLOSUM62[0])):
m[BLOSUM62[i][0] + '\t' + BLOSUM62[j-1][0]] = BLOSUM62[i][j]
def read_sequence(filename):
header = ''
sequence = ''
with open(filename, 'r') as file:
for line in file:
line = line.strip()
if line.startswith('>'):
header = line # Store the header but don't add it to the sequence
else:
sequence += line # Only add non-header lines to the sequence
return sequence
# Read sequences with corrected paths and ensuring they are valid
seq1 = read_sequence('C:\\Users\\20815\\Desktop\\IBI1_2023-24\\Practical13\\SLC6A4_HUMAN (1).fa')
seq2 = read_sequence('C:\\Users\\20815\\Desktop\\IBI1_2023-24\\Practical13\\SLC6A4_MOUSE (1).fa')
seq3 = read_sequence('C:\\Users\\20815\\Desktop\\IBI1_2023-24\\Practical13\\SLC6A4_RAT (1).fa')
BLOSUM62_dict = {}
sum=0
def align_score(seq1, seq2, BLOSUM62_dict):
t = [[0]]
for n in range(1, len(seq2)+1):
t[0].append(n*(-5))
for i in range(1, len(seq1)+1):
t.append([i*(-5)] + [0]*len(seq2))
for j in range(1, len(seq2)+1):
if seq1[i-1] == seq2[j-1]:
t[i][j] = t[i-1][j-1] + int(m[seq1[i-1]+'\t'+seq2[j-1]])
else:
t[i][j] = max(t[i-1][j]-5, t[i][j-1]-5, t[i-1][j-1]+int(m[seq1[i-1]+'\t'+seq2[j-1]]))
return (t[len(seq1)][len(seq2)])
def calculate_identity(seq1, seq2):
identical = len([1 for a, b in zip(seq1, seq2) if a == b])
return (identical / len(seq1)) * 100 if seq1 else 0
score1 = align_score(seq1, seq2, BLOSUM62_dict)
score2 = align_score(seq2, seq3, BLOSUM62_dict)
score3 = align_score(seq1, seq3, BLOSUM62_dict)
identity1 = calculate_identity(seq1, seq2)
identity2 = calculate_identity(seq2, seq3)
identity3 = calculate_identity(seq1, seq3)
# Improved output with sequence headers included
print('seq1 and seq2: Alignment Score=',score1, 'Identity=',identity1)
print('seq2 and seq3: Alignment Score=',score2, 'Identity=',identity2)
print('seq1 and seq3: Alignment Score=',score3, 'Identity=',identity3)
Empty file.
Empty file.
34 changes: 34 additions & 0 deletions Practical14/DOMAPI.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import xml.etree.ElementTree as ET
import datetime
import matplotlib.pyplot as plt
start_time_dom = datetime.datetime.now()
counts = {'molecular_function': 0, 'biological_process': 0, 'cellular_component': 0}
tree = ET.parse('C:/Users/20815/Desktop/IBI1_2023-24/Practical14/go_obo.xml')
root = tree.getroot()

for term in root.findall('term'):
namespace = term.find('namespace').text
if namespace in counts:
counts[namespace] += 1
for ontology, count in counts.items():
print(f"{ontology}: {count} terms")
end_time_dom = datetime.datetime.now()
dom_duration = end_time_dom - start_time_dom
print(f"DOM API took: {dom_duration}")

import matplotlib.pyplot as plt

ontologies = list(counts.keys())
frequencies = list(counts.values())

plt.figure(figsize=(10, 5))
plt.bar(ontologies, frequencies)
plt.xlabel('Ontologies')
plt.ylabel('Term Frequency')
plt.title('GO Terms Distribution by Ontology (DOM)')
plt.show()

# molecular_function: 12154 terms
# biological_process: 30794 terms
# cellular_component: 4392 terms
# DOM API took: 0:00:02.574873
45 changes: 45 additions & 0 deletions Practical14/SAX.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import xml.sax
from collections import defaultdict
import matplotlib.pyplot as plt
from datetime import datetime
start_time_sax = datetime.now()
class GOHandler(xml.sax.ContentHandler):
def __init__(self):
self.counts = defaultdict(int)
self.current_namespace_buffer = []

def startElement(self, name, attrs):
if name == 'namespace':
self.current_namespace_buffer = []

def characters(self, content):
if self.current_namespace_buffer is not None:
self.current_namespace_buffer.append(content)

def endElement(self, name):
if name == 'namespace':
self.current_namespace = ''.join(self.current_namespace_buffer).strip()
if self.current_namespace in ['molecular_function', 'biological_process', 'cellular_component']:
self.counts[self.current_namespace] += 1
self.current_namespace_buffer = None
self.current_namespace = None

handler = GOHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.parse('C:/Users/20815/Desktop/IBI1_2023-24/Practical14/go_obo.xml')
end_time_sax = datetime.now()
sax_duration = end_time_sax - start_time_sax
print(f"SAX API took: {sax_duration}")

ontologies = list(handler.counts.keys())
frequencies = list(handler.counts.values())

plt.figure(figsize=(10, 5))
plt.bar(ontologies, frequencies)
plt.xlabel('Ontologies')
plt.ylabel('Term Frequency')
plt.title('GO Terms Distribution by Ontology (SAX)')
plt.show()
# SAX API took: 0:00:01.986772
# SAX API is faster.
Loading