r0bah0lic · exPAIMON · Feb 28, 2024 · Feb 28, 2024 · Feb 28, 2024 · Feb 28, 2024
diff --git a/Practical10/Reflection.txt.txt b/Practical10/Reflection.txt.txt
@@ -0,0 +1,6 @@
+The practical is a bit hard for me. 
+Drawing the heat map is a very new experience, which is both interesting and challenging. Finding proper and simple ways to
+reach a single goal demands high on the understanding of related functions. For example, I struggled much before finding
+out the use of 'transpose' and 'nonzero', becaouse I tried to express the same meaning with many 'while' and 'if'.
+One important thing I learned is that define functions can prevent many errors brought by nested functions, for instance, 
+a 'for' in another 'for' is easy to mix the results.
diff --git a/Practical10/SIR.py b/Practical10/SIR.py
@@ -0,0 +1,33 @@
+import numpy as np 
+import matplotlib.pyplot as plt 
+N = 10000      #total population
+I = 1          #infected person
+S = N - I      #susceptive person
+R = 0          #recovered person
+beta = 0.3     #possibility of infection
+gamma = 0.05   #possibility of recovery
+sus = [S]         # #create arrays for storing data
+infected = [I]
+recovered = [R]
+time = [0]      
+for t in range(1, 1001):
+    contact = beta  * infected[-1] / N       #possibility of contact
+    new_infected = np.random.choice(sus[-1], 2, contact)       #select latest S in sus for calculating new infected ones
+    new_recovered = np.random.choice(infected[-1], 2, gamma)   #choose one in two as getting infected/recovered or not
+    sus_new = sus[-1] - new_infected
+    infected_new = infected[-1] + new_infected - new_recovered
+    recovered_new = recovered[-1] + new_recovered
+    sus.append(sus_new)
+    infected.append(infected_new)
+    recovered.append(recovered_new)
+    time.append(t)
+plt.figure(figsize =(6, 4), dpi=150) 
+plt.xlabel('Time')
+plt.ylabel('Number of People')
+plt.title('SIR Model')
+plt.plot(time, sus, label='Susceptible', color = 'green')
+plt.plot(time, infected, label='Infected', color = 'red')
+plt.plot(time, recovered, label='Recovered', color = 'blue')
+plt.legend()
+plt.savefig('figure', format = 'png')
+plt.show()
diff --git a/Practical10/SIR_vaccination.py b/Practical10/SIR_vaccination.py
@@ -0,0 +1,38 @@
+import numpy as np 
+import matplotlib.pyplot as plt 
+N = 10000
+I = 1
+S = N - I 
+R = 0
+beta = 0.3
+gamma = 0.05
+vaccine_rate = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
+def vaccination(v_rate): #make former steps together a function,so that it's more convenient to test with different vaccination rates
+    S = N * (1 - v_rate)  #vaccinated ones don't get infected
+    I= 1  
+    R = 0
+    sus = [S]
+    infected = [I]
+    recovered = [R]
+    time = [0]
+    for t in range(1, 1001):
+        contact = beta * infected[-1] / N
+        new_infected = np.random.binomial(sus[-1], contact)  #r.binomial(x,p) = r.choice(x,2,p)
+        new_recovered = np.random.binomial(infected[-1], gamma)
+        sus_new = sus[-1] - new_infected
+        infected_new = infected[-1] + new_infected - new_recovered
+        recovered_new = recovered[-1] + new_recovered
+        sus.append(sus_new)
+        infected.append(infected_new)
+        recovered.append(recovered_new)
+        time.append(t)
+    return time, infected #return these two values to draw the new figure
+for v_rate in vaccine_rate:
+    newtime, newI = vaccination(v_rate)
+    plt.plot(newtime, newI,label = v_rate)
+plt.xlabel('Time')
+plt.ylabel('Number of People')
+plt.title('SIR Model with Different Vaccination Rates')
+plt.legend()
+plt.savefig('infections_under_vaccination_rates.png', format='png')
+plt.show()
diff --git a/Practical10/spatial_SIR.py b/Practical10/spatial_SIR.py
@@ -0,0 +1,26 @@
+import numpy as np 
+import matplotlib.pyplot as plt 
+population = np. zeros ( (100 , 100) )
+outbreak = np. random . choice (range(100) ,2) 
+population [ outbreak [0] , outbreak [ 1 ] ] = 1 
+beta = 0.3
+gamma = 0.05
+def update(population, beta, gamma):  #define the infection function
+    new_population = population.copy()  #update the population figure
+    infected_positions = np.transpose(np.nonzero(population == 1))  #find the positions of infected(population=1)
+    for i, j in infected_positions:
+        for di, dj in [(0, 1), (0, -1), (1, 0), (-1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)]:  #find neighbours
+            ni, nj = i + di, j + dj
+            if 0 <= ni < 100 and 0 <= nj < 100 and population[ni, nj] == 0:  #if neighbour is S and in 100*100 figure
+                if np.random.rand() < beta:  #satisfy the infection probability
+                    new_population[ni, nj] = 1  #infect the neighbour
+    infected_indices = np.transpose(np.nonzero(new_population == 1))  #update infected positions after infection
+    for i, j in infected_indices:
+        if np.random.rand() < gamma:  #satisfy the recovery probability
+            new_population[i, j] = 2  #revover the infected, recovered=2
+    return new_population
+for i in range(1,101):
+    population = update(population, beta, gamma)
+plt.figure(figsize =(6, 4), dpi=150) 
+plt.imshow(population, cmap = 'viridis', interpolation = 'nearest')
+plt.show()
diff --git a/Practical13/SLC6A4_HUMAN (1).fa b/Practical13/SLC6A4_HUMAN (1).fa
@@ -0,0 +1,12 @@
+>sp|P31645|SC6A4_HUMAN Sodium-dependent serotonin transporter OS=Homo sapiens OX=9606 GN=SLC6A4 PE=1 SV=1
+METTPLNSQKQLSACEDGEDCQENGVLQKVVPTPGDKVESGQISNGYSAVPSPGAGDDTR
+HSIPATTTTLVAELHQGERETWGKKVDFLLSVIGYAVDLGNVWRFPYICYQNGGGAFLLP
+YTIMAIFGGIPLFYMELALGQYHRNGCISIWRKICPIFKGIGYAICIIAFYIASYYNTIM
+AWALYYLISSFTDQLPWTSCKNSWNTGNCTNYFSEDNITWTLHSTSPAEEFYTRHVLQIH
+RSKGLQDLGGISWQLALCIMLIFTVIYFSIWKGVKTSGKVVWVTATFPYIILSVLLVRGA
+TLPGAWRGVLFYLKPNWQKLLETGVWIDAAAQIFFSLGPGFGVLLAFASYNKFNNNCYQD
+ALVTSVVNCMTSFVSGFVIFTVLGYMAEMRNEDVSEVAKDAGPSLLFITYAEAIANMPAS
+TFFAIIFFLMLITLGLDSTFAGLEGVITAVLDEFPHVWAKRRERFVLAVVITCFFGSLVT
+LTFGGAYVVKLLEEYATGPAVLTVALIEAVAVSWFYGITQFCRDVKEMLGFSPGWFWRIC
+WVAISPLFLLFIICSFLMSPPQLRLFQYNYPYWSIILGYCIGTSSFICIPTYIAYRLIIT
+PGTFKERIIKSITPETPTEIPCGDIRLNAV
diff --git a/Practical13/SLC6A4_MOUSE (1).fa b/Practical13/SLC6A4_MOUSE (1).fa
@@ -0,0 +1,12 @@
+>sp|Q60857|SC6A4_MOUSE Sodium-dependent serotonin transporter OS=Mus musculus OX=10090 GN=Slc6a4 PE=1 SV=4
+METTPLNSQKVLSECKDKEDCQENGVLQKGVPTPADKAGPGQISNGYSAVPSTSAGDEAP
+HSTPAATTTLVAEIHQGERETWGKKMDFLLSVIGYAVDLGNIWRFPYICYQNGGGAFLLP
+YTIMAIFGGIPLFYMELALGQYHRNGCISIWKKICPIFKGIGYAICIIAFYIASYYNTII
+AWALYYLISSFTDQLPWTSCKNSWNTGNCTNYFAQDNITWTLHSTSPAEEFYLRHVLQIH
+QSKGLQDLGTISWQLALCIMLIFTIIYFSIWKGVKTSGKVVWVTATFPYIVLSVLLVRGA
+TLPGAWRGVVFYLKPNWQKLLETGVWVDAAAQIFFSLGPGFGVLLAFASYNKFNNNCYQD
+ALVTSVVNCMTSFVSGFVIFTVLGYMAEMRNEDVSEVAKDAGPSLLFITYAEAIANMPAS
+TFFAIIFFLMLITLGLDSTFAGLEGVITAVLDEFPHIWAKRREWFVLIVVITCILGSLLT
+LTSGGAYVVTLLEEYATGPAVLTVALIEAVVVSWFYGITQFCSDVKEMLGFSPGWFWRIC
+WVAISPLFLLFIICSFLMSPPQLRLFQYNYPHWSIILGYCIGTSSVICIPIYIIYRLIST
+PGTLKERIIKSITPETPTEIPCGDIRMNAV
diff --git a/Practical13/SLC6A4_RAT (1).fa b/Practical13/SLC6A4_RAT (1).fa
@@ -0,0 +1,12 @@
+>sp|P31652|SC6A4_RAT Sodium-dependent serotonin transporter OS=Rattus norvegicus OX=10116 GN=Slc6a4 PE=1 SV=1
+METTPLNSQKVLSECKDREDCQENGVLQKGVPTTADRAEPSQISNGYSAVPSTSAGDEAS
+HSIPAATTTLVAEIRQGERETWGKKMDFLLSVIGYAVDLGNIWRFPYICYQNGGGAFLLP
+YTIMAIFGGIPLFYMELALGQYHRNGCISIWRKICPIFKGIGYAICIIAFYIASYYNTII
+AWALYYLISSLTDRLPWTSCTNSWNTGNCTNYFAQDNITWTLHSTSPAEEFYLRHVLQIH
+QSKGLQDLGTISWQLTLCIVLIFTVIYFSIWKGVKTSGKVVWVTATFPYIVLSVLLVRGA
+TLPGAWRGVVFYLKPNWQKLLETGVWVDAAAQIFFSLGPGFGVLLAFASYNKFNNNCYQD
+ALVTSVVNCMTSFVSGFVIFTVLGYMAEMRNEDVSEVAKDAGPSLLFITYAEAIANMPAS
+TFFAIIFFLMLITLGLDSTFAGLEGVITAVLDEFPHIWAKRREWFVLIVVITCVLGSLLT
+LTSGGAYVVTLLEEYATGPAVLTVALIEAVAVSWFYGITQFCSDVKEMLGFSPGWFWRIC
+WVAISPLFLLFIICSFLMSPPQLRLFQYNYPHWSIVLGYCIGMSSVICIPTYIIYRLIST
+PGTLKERIIKSITPETPTEIPCGDIRMNAV
diff --git a/Practical13/alignment.py b/Practical13/alignment.py
@@ -0,0 +1,70 @@
+BLOSUM62 = '''A  4  0 -2 -1 -2  0 -2 -1 -1 -1 -1 -2 -1 -1 -1  1  0  0 -3 -2
+C  0  9 -3 -4 -2 -3 -3 -1 -3 -1 -1 -3 -3 -3 -3 -1 -1 -1 -2 -2
+D -2 -3  6  2 -3 -1 -1 -3 -1 -4 -3  1 -1  0 -2  0 -1 -3 -4 -3
+E -1 -4  2  5 -3 -2  0 -3  1 -3 -2  0 -1  2  0  0 -1 -2 -3 -2
+F -2 -2 -3 -3  6 -3 -1  0 -3  0  0 -3 -4 -3 -3 -2 -2 -1  1  3
+G  0 -3 -1 -2 -3  6 -2 -4 -2 -4 -3  0 -2 -2 -2  0 -2 -3 -2 -3
+H -2 -3 -1  0 -1 -2  8 -3 -1 -3 -2  1 -2  0  0 -1 -2 -3 -2  2
+I -1 -1 -3 -3  0 -4 -3  4 -3  2  1 -3 -3 -3 -3 -2 -1  3 -3 -1
+K -1 -3 -1  1 -3 -2 -1 -3  5 -2 -1  0 -1  1  2  0 -1 -2 -3 -2
+L -1 -1 -4 -3  0 -4 -3  2 -2  4  2 -3 -3 -2 -2 -2 -1  1 -2 -1
+M -1 -1 -3 -2  0 -3 -2  1 -1  2  5 -2 -2  0 -1 -1 -1  1 -1 -1
+N -2 -3  1  0 -3  0  1 -3  0 -3 -2  6 -2  0  0  1  0 -3 -4 -2
+P -1 -3 -1 -1 -4 -2 -2 -3 -1 -3 -2 -2  7 -1 -2 -1 -1 -2 -4 -3
+Q -1 -3  0  2 -3 -2  0 -3  1 -2  0  0 -1  5  1  0 -1 -2 -2 -1
+R -1 -3 -2  0 -3 -2  0 -3  2 -2 -1  0 -2  1  5 -1 -1 -3 -3 -2
+S  1 -1  0  0 -2  0 -1 -2  0 -2 -1  1 -1  0 -1  4  1 -2 -3 -2
+T  0 -1 -1 -1 -2 -2 -2 -1 -1 -1 -1  0 -1 -1 -1  1  5  0 -2 -2
+V  0 -1 -3 -2 -1 -3 -3  3 -2  1  1 -3 -2 -2 -3 -2  0  4 -3 -1
+W -3 -2 -4 -3  1 -2 -2 -3 -3 -2 -1 -4 -4 -2 -3 -3 -2 -3 11  2
+Y -2 -2 -3 -2  3 -3  2 -1 -2 -1 -1 -2 -3 -1 -2 -2 -2 -1  2  7'''.split('\n')
+
+for i in range(len(BLOSUM62)):
+    BLOSUM62[i] = BLOSUM62[i].split()
+
+m={}
+for i in range(len(BLOSUM62)):
+    for j in range(1, len(BLOSUM62[0])):
+        m[BLOSUM62[i][0] + '\t' + BLOSUM62[j-1][0]] = BLOSUM62[i][j]
+def read_sequence(filename):
+    header = ''
+    sequence = ''
+    with open(filename, 'r') as file:
+        for line in file:
+            line = line.strip()
+            if line.startswith('>'):
+                header = line  # Store the header but don't add it to the sequence
+            else:
+                sequence += line  # Only add non-header lines to the sequence
+    return sequence
+# Read sequences with corrected paths and ensuring they are valid
+seq1 = read_sequence('C:\\Users\\20815\\Desktop\\IBI1_2023-24\\Practical13\\SLC6A4_HUMAN (1).fa')
+seq2 = read_sequence('C:\\Users\\20815\\Desktop\\IBI1_2023-24\\Practical13\\SLC6A4_MOUSE (1).fa')
+seq3 = read_sequence('C:\\Users\\20815\\Desktop\\IBI1_2023-24\\Practical13\\SLC6A4_RAT (1).fa')
+BLOSUM62_dict = {}
+sum=0
+def align_score(seq1, seq2, BLOSUM62_dict):
+    t = [[0]]
+    for n in range(1, len(seq2)+1):
+        t[0].append(n*(-5))
+    for i in range(1, len(seq1)+1):
+        t.append([i*(-5)] + [0]*len(seq2))
+        for j in range(1, len(seq2)+1):
+            if seq1[i-1] == seq2[j-1]:
+                t[i][j] = t[i-1][j-1] + int(m[seq1[i-1]+'\t'+seq2[j-1]])
+            else:
+                t[i][j] = max(t[i-1][j]-5, t[i][j-1]-5, t[i-1][j-1]+int(m[seq1[i-1]+'\t'+seq2[j-1]]))
+    return (t[len(seq1)][len(seq2)])
+def calculate_identity(seq1, seq2):
+    identical = len([1 for a, b in zip(seq1, seq2) if a == b])
+    return (identical / len(seq1)) * 100 if seq1 else 0
+score1 = align_score(seq1, seq2, BLOSUM62_dict)
+score2 = align_score(seq2, seq3, BLOSUM62_dict)
+score3 = align_score(seq1, seq3, BLOSUM62_dict)
+identity1 = calculate_identity(seq1, seq2)
+identity2 = calculate_identity(seq2, seq3)
+identity3 = calculate_identity(seq1, seq3)
+# Improved output with sequence headers included
+print('seq1 and seq2: Alignment Score=',score1, 'Identity=',identity1)
+print('seq2 and seq3: Alignment Score=',score2, 'Identity=',identity2)
+print('seq1 and seq3: Alignment Score=',score3, 'Identity=',identity3)
diff --git a/Practical13/新建文本文档 (2).txt b/Practical13/新建文本文档 (2).txt
diff --git a/Practical13/新建文本文档 (3).txt b/Practical13/新建文本文档 (3).txt
diff --git a/Practical14/DOMAPI.py b/Practical14/DOMAPI.py
@@ -0,0 +1,34 @@
+import xml.etree.ElementTree as ET
+import datetime
+import matplotlib.pyplot as plt
+start_time_dom = datetime.datetime.now()
+counts = {'molecular_function': 0, 'biological_process': 0, 'cellular_component': 0}
+tree = ET.parse('C:/Users/20815/Desktop/IBI1_2023-24/Practical14/go_obo.xml')
+root = tree.getroot()
+
+for term in root.findall('term'):
+    namespace = term.find('namespace').text
+    if namespace in counts:
+        counts[namespace] += 1
+for ontology, count in counts.items():
+    print(f"{ontology}: {count} terms")
+end_time_dom = datetime.datetime.now()
+dom_duration = end_time_dom - start_time_dom
+print(f"DOM API took: {dom_duration}")
+
+import matplotlib.pyplot as plt
+
+ontologies = list(counts.keys())
+frequencies = list(counts.values())
+
+plt.figure(figsize=(10, 5))
+plt.bar(ontologies, frequencies)
+plt.xlabel('Ontologies')
+plt.ylabel('Term Frequency')
+plt.title('GO Terms Distribution by Ontology (DOM)')
+plt.show()
+
+# molecular_function: 12154 terms
+# biological_process: 30794 terms
+# cellular_component: 4392 terms
+# DOM API took: 0:00:02.574873
diff --git a/Practical14/SAX.py b/Practical14/SAX.py
@@ -0,0 +1,45 @@
+import xml.sax
+from collections import defaultdict
+import matplotlib.pyplot as plt
+from datetime import datetime
+start_time_sax = datetime.now()
+class GOHandler(xml.sax.ContentHandler):
+    def __init__(self):
+        self.counts = defaultdict(int)
+        self.current_namespace_buffer = []
+
+    def startElement(self, name, attrs):
+        if name == 'namespace':
+            self.current_namespace_buffer = []  
+
+    def characters(self, content):
+        if self.current_namespace_buffer is not None:
+            self.current_namespace_buffer.append(content)  
+
+    def endElement(self, name):
+        if name == 'namespace':
+            self.current_namespace = ''.join(self.current_namespace_buffer).strip() 
+            if self.current_namespace in ['molecular_function', 'biological_process', 'cellular_component']:
+                self.counts[self.current_namespace] += 1
+            self.current_namespace_buffer = None  
+            self.current_namespace = None
+
+handler = GOHandler()
+parser = xml.sax.make_parser()
+parser.setContentHandler(handler)
+parser.parse('C:/Users/20815/Desktop/IBI1_2023-24/Practical14/go_obo.xml')
+end_time_sax = datetime.now()
+sax_duration = end_time_sax - start_time_sax
+print(f"SAX API took: {sax_duration}")
+
+ontologies = list(handler.counts.keys())
+frequencies = list(handler.counts.values())
+
+plt.figure(figsize=(10, 5))
+plt.bar(ontologies, frequencies)
+plt.xlabel('Ontologies')
+plt.ylabel('Term Frequency')
+plt.title('GO Terms Distribution by Ontology (SAX)')
+plt.show()
+# SAX API took: 0:00:01.986772
+# SAX API is faster.