forked from JinyuanSun/PymolFold
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c91e362
commit dbad09a
Showing
5 changed files
with
243 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,4 @@ | ||
./app | ||
cloudmol/__pycache__ | ||
./venv | ||
./build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# CloudMol | ||
|
||
## Fold and design your protein with CloudMol on the cloud | ||
|
||
### Installation | ||
|
||
```bash | ||
pip | ||
``` | ||
|
||
### Usage | ||
1. Protein Folding | ||
```python | ||
from cloudmol.cloudmol import PymolFold | ||
pf = PymolFold() | ||
pf.query_esmfold("MTYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTE", '1pga') | ||
``` |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
import requests | ||
import re | ||
import os | ||
import json | ||
|
||
|
||
def cal_plddt(pdb_string: str): | ||
"""read b-factors of ca | ||
Args: | ||
pdb_string (str): _description_ | ||
""" | ||
lines = pdb_string.split("\n") | ||
plddts = [] | ||
for line in lines: | ||
if " CA " in line: | ||
plddt = float(line[60:66]) | ||
plddts.append(plddt) | ||
if max(plddts) <= 1.0: | ||
plddts = [plddt * 100 for plddt in plddts] | ||
print("Guessing the scale is [0,1], we scale it to [0, 100]") | ||
else: | ||
print("Guessing the scale is [0,100]") | ||
return sum(plddts) / len(plddts) | ||
|
||
class PymolFold(): | ||
def __init__(self, base_url: str = "http://region-8.seetacloud.com:42711/", abs_path: str = "PymolFold_workdir", verbose: bool = True): | ||
self.BASE_URL = base_url | ||
self.ABS_PATH = os.path.join(os.path.expanduser("~"), abs_path) | ||
print(f"Results will be saved to {self.ABS_PATH}") | ||
if not os.path.exists(self.ABS_PATH): | ||
os.makedirs(self.ABS_PATH) | ||
self.verbose = verbose | ||
|
||
def set_base_url(self, url): | ||
self.BASE_URL = url | ||
|
||
def set_path(self, path): | ||
self.ABS_PATH = path | ||
|
||
|
||
def query_pymolfold(self, sequence: str, num_recycle: int = 3, name: str = None): | ||
num_recycle = int(num_recycle) | ||
data = { | ||
'sequence': sequence, | ||
'num_recycles': num_recycle, | ||
} | ||
|
||
response = requests.post(f"{self.BASE_URL}predict/", | ||
json=data, timeout=1000) | ||
|
||
if not name: | ||
name = sequence[:3] + sequence[-3:] | ||
pdb_filename = os.path.join(self.ABS_PATH, name) + ".pdb" | ||
pdb_string = response.json()['output'] | ||
pdb_string = pdb_string.replace('\"', "") | ||
if pdb_string.startswith("PARENT"): | ||
pdb_string = pdb_string.replace("PARENT N/A\n", "") | ||
with open(pdb_filename, "w") as out: | ||
out.write(pdb_string.replace('\\n', '\n')) | ||
if self.verbose: | ||
print(f"Results saved to {pdb_filename}") | ||
plddt = cal_plddt(pdb_string) | ||
print("="*20) | ||
print(" pLDDT: "+"{:.2f}".format(plddt)) | ||
print("="*20) | ||
|
||
else: | ||
print(pdb_string) | ||
|
||
|
||
def query_esmfold(self, sequence: str, name: str = None): | ||
"""Predict protein structure with ESMFold | ||
Args: | ||
sequence (str): amino acid sequence | ||
name (str, optional): _description_. Defaults to None. | ||
""" | ||
sequence = re.sub("[^A-Z:]", "", sequence.replace("/", ":").upper()) | ||
sequence = re.sub(":+", ":", sequence) | ||
sequence = re.sub("^[:]+", "", sequence) | ||
sequence = re.sub("[:]+$", "", sequence) | ||
|
||
headers = { | ||
"Content-Type": "application/x-www-form-urlencoded", | ||
} | ||
|
||
response = requests.post( | ||
"https://api.esmatlas.com/foldSequence/v1/pdb/", headers=headers, data=sequence | ||
) | ||
if not name: | ||
name = sequence[:3] + sequence[-3:] | ||
pdb_filename = os.path.join(self.ABS_PATH, name) + ".pdb" | ||
pdb_string = response.content.decode("utf-8") | ||
if pdb_string.startswith("HEADER"): | ||
with open(pdb_filename, "w") as out: | ||
out.write(pdb_string) | ||
if self.verbose: | ||
print(f"Results saved to {pdb_filename}") | ||
plddt = cal_plddt(pdb_string) | ||
print("="*20) | ||
print(" pLDDT: "+"{:.2f}".format(plddt)) | ||
print("="*20) | ||
else: | ||
print(pdb_string) | ||
|
||
|
||
def query_mpnn(self, path_to_pdb: str, fix_pos=None, chain=None, rm_aa=None, inverse=False, homooligomeric=False): | ||
"""query ProteinMPNN server for de novo protein design | ||
Args: | ||
path_to_pdb (str): _description_ | ||
Returns: | ||
_type_: _description_ | ||
""" | ||
headers = { | ||
'accept': 'application/json', | ||
} | ||
files = { | ||
'file': open(path_to_pdb, 'rb'), | ||
} | ||
|
||
params = { | ||
"fix_pos": fix_pos, | ||
"chain": chain, | ||
"rm_aa": rm_aa, | ||
"inverse": inverse, | ||
"homooligomeric": homooligomeric, | ||
} | ||
|
||
response = requests.post( | ||
f"{self.BASE_URL}mpnn", headers=headers, files=files, params=params) | ||
|
||
res = response.content.decode("utf-8") | ||
|
||
d = json.loads(res) | ||
|
||
fasta_string = "" | ||
for i, (seq, score, seqid) in enumerate(zip(d['seq'], d['score'], d['seqid'])): | ||
fasta_string += f">des_{i},score={score},seqid={seqid}\n{seq}\n" | ||
if self.verbose: | ||
print(fasta_string) | ||
return fasta_string | ||
|
||
|
||
def query_singlemut(self, path_to_pdb: str, wild, resseq, mut): | ||
"""query ProteinMPNN server for de novo protein design | ||
Args: | ||
path_to_pdb (str): _description_ | ||
Returns: | ||
d (dict): {mutation: str, score: float} | ||
""" | ||
headers = { | ||
'accept': 'application/json', | ||
} | ||
|
||
params = { | ||
'wild': wild, | ||
'resseq': resseq, | ||
'mut': mut, | ||
} | ||
|
||
files = { | ||
'file': open(path_to_pdb, 'rb'), | ||
} | ||
|
||
response = requests.post(f'{self.BASE_URL}signlemut', | ||
params=params, headers=headers, files=files) | ||
|
||
res = response.content.decode("utf-8") | ||
|
||
d = json.loads(res) | ||
if self.verbose: | ||
print(f"\n\tmutation: {d['mutation']}, score: {d['score']}\n") | ||
return d | ||
|
||
|
||
def query_dms(self, path_to_pdb: str): | ||
"""query ProteinMPNN server for de novo protein design | ||
Args: | ||
path_to_pdb (str): _description_ | ||
Returns: | ||
_type_: _description_ | ||
""" | ||
headers = { | ||
'accept': 'application/json', | ||
} | ||
files = { | ||
'file': open(path_to_pdb, 'rb'), | ||
} | ||
|
||
response = requests.post(f'{self.BASE_URL}dms', headers=headers, files=files) | ||
|
||
res = response.content.decode("utf-8") | ||
|
||
d = json.loads(res) | ||
with open('dms_results.csv', 'w+') as ofile: | ||
ofile.write('mutation,002,010,020,030,ensemble\n') | ||
for name, s1, s2, s3, s4, s5 in zip(d['mutation'], d['002'], d['010'], d['020'], d['030'], d['ensemble']): | ||
ofile.write(f'{name},{s1},{s2},{s3},{s4},{s5}\n') | ||
p = os.path.join(self.ABS_PATH, 'dms_results.csv') | ||
print(f"Results save to '{p}'") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from setuptools import setup, find_packages | ||
|
||
setup( | ||
name='cloudmol', | ||
version='0.1', | ||
packages=find_packages(), | ||
install_requires=[ | ||
"requests", | ||
], | ||
python_requires='>=3.6', # Your Python compatibility | ||
author='Jinyuan Sun', | ||
author_email='[email protected]', | ||
description='Easily protein folding and design with cloudmol', | ||
long_description=open('README.md').read(), | ||
long_description_content_type='text/markdown', # If your README is in Markdown | ||
url='https://github.com/JinyuanSun/PymolFold', # URL of your project | ||
) |