Skip to content

Commit

Permalink
create setup.py
Browse files Browse the repository at this point in the history
  • Loading branch information
JinyuanSun committed May 31, 2023
1 parent c91e362 commit dbad09a
Show file tree
Hide file tree
Showing 5 changed files with 243 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
./app
cloudmol/__pycache__
./venv
./build
17 changes: 17 additions & 0 deletions cloudmol/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# CloudMol

## Fold and design your protein with CloudMol on the cloud

### Installation

```bash
pip
```

### Usage
1. Protein Folding
```python
from cloudmol.cloudmol import PymolFold
pf = PymolFold()
pf.query_esmfold("MTYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTE", '1pga')
```
Empty file added cloudmol/__init__.py
Empty file.
206 changes: 206 additions & 0 deletions cloudmol/cloudmol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
import requests
import re
import os
import json


def cal_plddt(pdb_string: str):
"""read b-factors of ca
Args:
pdb_string (str): _description_
"""
lines = pdb_string.split("\n")
plddts = []
for line in lines:
if " CA " in line:
plddt = float(line[60:66])
plddts.append(plddt)
if max(plddts) <= 1.0:
plddts = [plddt * 100 for plddt in plddts]
print("Guessing the scale is [0,1], we scale it to [0, 100]")
else:
print("Guessing the scale is [0,100]")
return sum(plddts) / len(plddts)

class PymolFold():
def __init__(self, base_url: str = "http://region-8.seetacloud.com:42711/", abs_path: str = "PymolFold_workdir", verbose: bool = True):
self.BASE_URL = base_url
self.ABS_PATH = os.path.join(os.path.expanduser("~"), abs_path)
print(f"Results will be saved to {self.ABS_PATH}")
if not os.path.exists(self.ABS_PATH):
os.makedirs(self.ABS_PATH)
self.verbose = verbose

def set_base_url(self, url):
self.BASE_URL = url

def set_path(self, path):
self.ABS_PATH = path


def query_pymolfold(self, sequence: str, num_recycle: int = 3, name: str = None):
num_recycle = int(num_recycle)
data = {
'sequence': sequence,
'num_recycles': num_recycle,
}

response = requests.post(f"{self.BASE_URL}predict/",
json=data, timeout=1000)

if not name:
name = sequence[:3] + sequence[-3:]
pdb_filename = os.path.join(self.ABS_PATH, name) + ".pdb"
pdb_string = response.json()['output']
pdb_string = pdb_string.replace('\"', "")
if pdb_string.startswith("PARENT"):
pdb_string = pdb_string.replace("PARENT N/A\n", "")
with open(pdb_filename, "w") as out:
out.write(pdb_string.replace('\\n', '\n'))
if self.verbose:
print(f"Results saved to {pdb_filename}")
plddt = cal_plddt(pdb_string)
print("="*20)
print(" pLDDT: "+"{:.2f}".format(plddt))
print("="*20)

else:
print(pdb_string)


def query_esmfold(self, sequence: str, name: str = None):
"""Predict protein structure with ESMFold
Args:
sequence (str): amino acid sequence
name (str, optional): _description_. Defaults to None.
"""
sequence = re.sub("[^A-Z:]", "", sequence.replace("/", ":").upper())
sequence = re.sub(":+", ":", sequence)
sequence = re.sub("^[:]+", "", sequence)
sequence = re.sub("[:]+$", "", sequence)

headers = {
"Content-Type": "application/x-www-form-urlencoded",
}

response = requests.post(
"https://api.esmatlas.com/foldSequence/v1/pdb/", headers=headers, data=sequence
)
if not name:
name = sequence[:3] + sequence[-3:]
pdb_filename = os.path.join(self.ABS_PATH, name) + ".pdb"
pdb_string = response.content.decode("utf-8")
if pdb_string.startswith("HEADER"):
with open(pdb_filename, "w") as out:
out.write(pdb_string)
if self.verbose:
print(f"Results saved to {pdb_filename}")
plddt = cal_plddt(pdb_string)
print("="*20)
print(" pLDDT: "+"{:.2f}".format(plddt))
print("="*20)
else:
print(pdb_string)


def query_mpnn(self, path_to_pdb: str, fix_pos=None, chain=None, rm_aa=None, inverse=False, homooligomeric=False):
"""query ProteinMPNN server for de novo protein design
Args:
path_to_pdb (str): _description_
Returns:
_type_: _description_
"""
headers = {
'accept': 'application/json',
}
files = {
'file': open(path_to_pdb, 'rb'),
}

params = {
"fix_pos": fix_pos,
"chain": chain,
"rm_aa": rm_aa,
"inverse": inverse,
"homooligomeric": homooligomeric,
}

response = requests.post(
f"{self.BASE_URL}mpnn", headers=headers, files=files, params=params)

res = response.content.decode("utf-8")

d = json.loads(res)

fasta_string = ""
for i, (seq, score, seqid) in enumerate(zip(d['seq'], d['score'], d['seqid'])):
fasta_string += f">des_{i},score={score},seqid={seqid}\n{seq}\n"
if self.verbose:
print(fasta_string)
return fasta_string


def query_singlemut(self, path_to_pdb: str, wild, resseq, mut):
"""query ProteinMPNN server for de novo protein design
Args:
path_to_pdb (str): _description_
Returns:
d (dict): {mutation: str, score: float}
"""
headers = {
'accept': 'application/json',
}

params = {
'wild': wild,
'resseq': resseq,
'mut': mut,
}

files = {
'file': open(path_to_pdb, 'rb'),
}

response = requests.post(f'{self.BASE_URL}signlemut',
params=params, headers=headers, files=files)

res = response.content.decode("utf-8")

d = json.loads(res)
if self.verbose:
print(f"\n\tmutation: {d['mutation']}, score: {d['score']}\n")
return d


def query_dms(self, path_to_pdb: str):
"""query ProteinMPNN server for de novo protein design
Args:
path_to_pdb (str): _description_
Returns:
_type_: _description_
"""
headers = {
'accept': 'application/json',
}
files = {
'file': open(path_to_pdb, 'rb'),
}

response = requests.post(f'{self.BASE_URL}dms', headers=headers, files=files)

res = response.content.decode("utf-8")

d = json.loads(res)
with open('dms_results.csv', 'w+') as ofile:
ofile.write('mutation,002,010,020,030,ensemble\n')
for name, s1, s2, s3, s4, s5 in zip(d['mutation'], d['002'], d['010'], d['020'], d['030'], d['ensemble']):
ofile.write(f'{name},{s1},{s2},{s3},{s4},{s5}\n')
p = os.path.join(self.ABS_PATH, 'dms_results.csv')
print(f"Results save to '{p}'")
17 changes: 17 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from setuptools import setup, find_packages

setup(
name='cloudmol',
version='0.1',
packages=find_packages(),
install_requires=[
"requests",
],
python_requires='>=3.6', # Your Python compatibility
author='Jinyuan Sun',
author_email='[email protected]',
description='Easily protein folding and design with cloudmol',
long_description=open('README.md').read(),
long_description_content_type='text/markdown', # If your README is in Markdown
url='https://github.com/JinyuanSun/PymolFold', # URL of your project
)

0 comments on commit dbad09a

Please sign in to comment.