create setup.py

akshadapradhan · May 31, 2023 · dbad09a · dbad09a
1 parent c91e362
commit dbad09a
Show file tree

Hide file tree

Showing 5 changed files with 243 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,4 @@
 ./app
+cloudmol/__pycache__
+./venv
+./build
diff --git a/cloudmol/README.md b/cloudmol/README.md
@@ -0,0 +1,17 @@
+# CloudMol
+
+## Fold and design your protein with CloudMol on the cloud
+
+### Installation
+
+```bash
+pip 
+```
+
+### Usage
+1. Protein Folding
+```python
+from cloudmol.cloudmol import PymolFold
+pf = PymolFold()         
+pf.query_esmfold("MTYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTE", '1pga')
+```
diff --git a/cloudmol/__init__.py b/cloudmol/__init__.py
diff --git a/cloudmol/cloudmol.py b/cloudmol/cloudmol.py
@@ -0,0 +1,206 @@
+import requests
+import re
+import os
+import json
+
+
+def cal_plddt(pdb_string: str):
+    """read b-factors of ca
+    Args:
+        pdb_string (str): _description_
+    """
+    lines = pdb_string.split("\n")
+    plddts = []
+    for line in lines:
+        if " CA " in line:
+            plddt = float(line[60:66])
+            plddts.append(plddt)
+    if max(plddts) <= 1.0:
+        plddts = [plddt * 100 for plddt in plddts]
+        print("Guessing the scale is [0,1], we scale it to [0, 100]")
+    else:
+        print("Guessing the scale is [0,100]")
+    return sum(plddts) / len(plddts)
+
+class PymolFold():
+    def __init__(self, base_url: str = "http://region-8.seetacloud.com:42711/", abs_path: str = "PymolFold_workdir", verbose: bool = True):
+        self.BASE_URL = base_url
+        self.ABS_PATH = os.path.join(os.path.expanduser("~"), abs_path)
+        print(f"Results will be saved to {self.ABS_PATH}")
+        if not os.path.exists(self.ABS_PATH):
+            os.makedirs(self.ABS_PATH)
+        self.verbose = verbose
+
+    def set_base_url(self, url):
+        self.BASE_URL = url
+
+    def set_path(self, path):
+        self.ABS_PATH = path
+
+
+    def query_pymolfold(self, sequence: str, num_recycle: int = 3, name: str = None):
+        num_recycle = int(num_recycle)
+        data = {
+            'sequence': sequence,
+            'num_recycles': num_recycle,
+        }
+
+        response = requests.post(f"{self.BASE_URL}predict/",
+                                json=data, timeout=1000)
+
+        if not name:
+            name = sequence[:3] + sequence[-3:]
+        pdb_filename = os.path.join(self.ABS_PATH, name) + ".pdb"
+        pdb_string = response.json()['output']
+        pdb_string = pdb_string.replace('\"', "")
+        if pdb_string.startswith("PARENT"):
+            pdb_string = pdb_string.replace("PARENT N/A\n", "")
+            with open(pdb_filename, "w") as out:
+                out.write(pdb_string.replace('\\n', '\n'))
+            if self.verbose:
+                print(f"Results saved to {pdb_filename}")
+                plddt = cal_plddt(pdb_string)
+                print("="*20)
+                print("    pLDDT: "+"{:.2f}".format(plddt))
+                print("="*20)
+
+        else:
+            print(pdb_string)
+
+
+    def query_esmfold(self, sequence: str, name: str = None):
+        """Predict protein structure with ESMFold
+
+        Args:
+            sequence (str): amino acid sequence
+            name (str, optional): _description_. Defaults to None.
+        """
+        sequence = re.sub("[^A-Z:]", "", sequence.replace("/", ":").upper())
+        sequence = re.sub(":+", ":", sequence)
+        sequence = re.sub("^[:]+", "", sequence)
+        sequence = re.sub("[:]+$", "", sequence)
+
+        headers = {
+            "Content-Type": "application/x-www-form-urlencoded",
+        }
+
+        response = requests.post(
+            "https://api.esmatlas.com/foldSequence/v1/pdb/", headers=headers, data=sequence
+        )
+        if not name:
+            name = sequence[:3] + sequence[-3:]
+        pdb_filename = os.path.join(self.ABS_PATH, name) + ".pdb"
+        pdb_string = response.content.decode("utf-8")
+        if pdb_string.startswith("HEADER"):
+            with open(pdb_filename, "w") as out:
+                out.write(pdb_string)
+            if self.verbose:
+                print(f"Results saved to {pdb_filename}")
+                plddt = cal_plddt(pdb_string)
+                print("="*20)
+                print("    pLDDT: "+"{:.2f}".format(plddt))
+                print("="*20)
+        else:
+            print(pdb_string)
+
+
+    def query_mpnn(self, path_to_pdb: str, fix_pos=None, chain=None, rm_aa=None, inverse=False, homooligomeric=False):
+        """query ProteinMPNN server for de novo protein design
+
+        Args:
+            path_to_pdb (str): _description_
+
+        Returns:
+            _type_: _description_
+        """
+        headers = {
+            'accept': 'application/json',
+        }
+        files = {
+            'file': open(path_to_pdb, 'rb'),
+        }
+
+        params = {
+            "fix_pos": fix_pos,
+            "chain": chain,
+            "rm_aa": rm_aa,
+            "inverse": inverse,
+            "homooligomeric": homooligomeric,
+        }
+
+        response = requests.post(
+            f"{self.BASE_URL}mpnn", headers=headers, files=files, params=params)
+
+        res = response.content.decode("utf-8")
+
+        d = json.loads(res)
+
+        fasta_string = ""
+        for i, (seq, score, seqid) in enumerate(zip(d['seq'], d['score'], d['seqid'])):
+            fasta_string += f">des_{i},score={score},seqid={seqid}\n{seq}\n"
+        if self.verbose:
+            print(fasta_string)
+        return fasta_string
+
+
+    def query_singlemut(self, path_to_pdb: str, wild, resseq, mut):
+        """query ProteinMPNN server for de novo protein design
+
+        Args:
+            path_to_pdb (str): _description_
+
+        Returns:
+            d (dict): {mutation: str, score: float}
+        """
+        headers = {
+            'accept': 'application/json',
+        }
+
+        params = {
+            'wild': wild,
+            'resseq': resseq,
+            'mut': mut,
+        }
+
+        files = {
+            'file': open(path_to_pdb, 'rb'),
+        }
+
+        response = requests.post(f'{self.BASE_URL}signlemut',
+                                params=params, headers=headers, files=files)
+
+        res = response.content.decode("utf-8")
+
+        d = json.loads(res)
+        if self.verbose:
+            print(f"\n\tmutation: {d['mutation']}, score: {d['score']}\n")
+        return d
+
+
+    def query_dms(self, path_to_pdb: str):
+        """query ProteinMPNN server for de novo protein design
+
+        Args:
+            path_to_pdb (str): _description_
+
+        Returns:
+            _type_: _description_
+        """
+        headers = {
+            'accept': 'application/json',
+        }
+        files = {
+            'file': open(path_to_pdb, 'rb'),
+        }
+
+        response = requests.post(f'{self.BASE_URL}dms', headers=headers, files=files)
+
+        res = response.content.decode("utf-8")
+
+        d = json.loads(res)
+        with open('dms_results.csv', 'w+') as ofile:
+            ofile.write('mutation,002,010,020,030,ensemble\n')
+            for name, s1, s2, s3, s4, s5 in zip(d['mutation'], d['002'], d['010'], d['020'], d['030'], d['ensemble']):
+                ofile.write(f'{name},{s1},{s2},{s3},{s4},{s5}\n')
+        p = os.path.join(self.ABS_PATH, 'dms_results.csv')
+        print(f"Results save to '{p}'")
diff --git a/setup.py b/setup.py
@@ -0,0 +1,17 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='cloudmol',
+    version='0.1',
+    packages=find_packages(),
+    install_requires=[
+        "requests",
+    ],
+    python_requires='>=3.6',  # Your Python compatibility
+    author='Jinyuan Sun',
+    author_email='[email protected]',
+    description='Easily protein folding and design with cloudmol',
+    long_description=open('README.md').read(),
+    long_description_content_type='text/markdown',  # If your README is in Markdown
+    url='https://github.com/JinyuanSun/PymolFold',  # URL of your project
+)