Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create an installable python package #22

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pretrained_models.tar.gz
*.swp

# cython compiled .c files
src/*.c
bepler/*.c

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion src/scop.py → bepler/scop.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import print_function,division

import numpy as np
import src.fasta as fasta
import bepler.fasta as fasta

class NullEncoder:
def encode(self, x):
Expand Down
File renamed without changes.
File renamed without changes.
12 changes: 6 additions & 6 deletions embed_sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
import torch.nn as nn
import torch.nn.functional as F

from src.alphabets import Uniprot21
import src.fasta as fasta
import src.models.sequence
from bepler.alphabets import Uniprot21
import bepler.fasta as fasta
import bepler.models.sequence


def unstack_lstm(lstm):
Expand All @@ -28,12 +28,12 @@ def unstack_lstm(lstm):
dest = attr + '0'
src = attr + str(i)
getattr(layer, dest).data[:] = getattr(lstm, src)
#setattr(layer, dest, getattr(lstm, src))
#setattr(layer, dest, getattr(lstm, bepler))

dest = attr + '0_reverse'
src = attr + str(i) + '_reverse'
getattr(layer, dest).data[:] = getattr(lstm, src)
#setattr(layer, dest, getattr(lstm, src))
#setattr(layer, dest, getattr(lstm, bepler))
layer.flatten_parameters()
layers.append(layer)
in_size = 2*hidden_dim
Expand Down Expand Up @@ -101,7 +101,7 @@ def load_model(path, use_cuda=False):
if use_cuda:
encoder.cuda()

if type(encoder) is src.models.sequence.BiLM:
if type(encoder) is bepler.models.sequence.BiLM:
# model is only the LM
return encoder.encode, None, None

Expand Down
10 changes: 5 additions & 5 deletions eval_contact_casp12.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
from torch.nn.utils.rnn import PackedSequence
import torch.utils.data

from src.alphabets import Uniprot21
import src.fasta as fasta
from src.utils import pack_sequences, unpack_sequences
from src.utils import ContactMapDataset, collate_lists
from src.metrics import average_precision
from bepler.alphabets import Uniprot21
import bepler.fasta as fasta
from bepler.utils import pack_sequences, unpack_sequences
from bepler.utils import ContactMapDataset, collate_lists
from bepler.metrics import average_precision


def load_data(seq_path, struct_path, alphabet, baselines=False):
Expand Down
10 changes: 5 additions & 5 deletions eval_contact_scop.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
from torch.nn.utils.rnn import PackedSequence
import torch.utils.data

from src.alphabets import Uniprot21
import src.fasta as fasta
from src.utils import pack_sequences, unpack_sequences
from src.utils import ContactMapDataset, collate_lists
from src.metrics import average_precision
from bepler.alphabets import Uniprot21
import bepler.fasta as fasta
from bepler.utils import pack_sequences, unpack_sequences
from bepler.utils import ContactMapDataset, collate_lists
from bepler.metrics import average_precision


def load_data(seq_path, struct_path, alphabet, baselines=False):
Expand Down
6 changes: 3 additions & 3 deletions eval_secstr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from torch.nn.utils.rnn import PackedSequence
import torch.utils.data

from src.alphabets import Uniprot21, SecStr8
from src.utils import pack_sequences, unpack_sequences
import src.pdb as pdb
from bepler.alphabets import Uniprot21, SecStr8
from bepler.utils import pack_sequences, unpack_sequences
import bepler.pdb as pdb


secstr_train_path = 'data/secstr/ss_cullpdb_pc40_res3.0_R1.0_d180412_filtered.train.fa'
Expand Down
8 changes: 4 additions & 4 deletions eval_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@

from scipy.stats import pearsonr,spearmanr

from src.utils import pack_sequences, unpack_sequences
from src.alphabets import Uniprot21
from src.alignment import nw_score
from src.metrics import average_precision
from bepler.utils import pack_sequences, unpack_sequences
from bepler.alphabets import Uniprot21
from bepler.alignment import nw_score
from bepler.metrics import average_precision


def encode_sequence(x, alphabet):
Expand Down
10 changes: 5 additions & 5 deletions eval_transmembrane.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from torch.nn.utils.rnn import PackedSequence
import torch.utils.data

from src.alphabets import Uniprot21
from src.parse_utils import parse_3line
import src.transmembrane as tm
from bepler.alphabets import Uniprot21
from bepler.parse_utils import parse_3line
import bepler.transmembrane as tm

def load_3line(path, alphabet):
with open(path, 'rb') as f:
Expand Down Expand Up @@ -63,12 +63,12 @@ def unstack_lstm(lstm):
dest = attr + '0'
src = attr + str(i)
getattr(layer, dest).data[:] = getattr(lstm, src)
#setattr(layer, dest, getattr(lstm, src))
#setattr(layer, dest, getattr(lstm, bepler))

dest = attr + '0_reverse'
src = attr + str(i) + '_reverse'
getattr(layer, dest).data[:] = getattr(lstm, src)
#setattr(layer, dest, getattr(lstm, src))
#setattr(layer, dest, getattr(lstm, bepler))
layers.append(layer)
in_size = 2*hidden_dim
return layers
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[build-system]
requires = ["setuptools >= 40.9.0", "wheel", "Cython", "numpy"]
build-backend = "setuptools.build_meta"
10 changes: 7 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from distutils.core import setup
from Cython.Build import cythonize

import numpy as np
from Cython.Build import cythonize
from setuptools import find_packages

setup(
ext_modules = cythonize(['src/metrics.pyx', 'src/alignment.pyx']),
include_dirs=[np.get_include()]
name="bepler",
packages=find_packages(),
ext_modules=cythonize(["bepler/metrics.pyx", "bepler/alignment.pyx"]),
include_dirs=[np.get_include()],
)
10 changes: 5 additions & 5 deletions train_lm_pfam.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
import torch.utils.data
from torch.nn.utils.rnn import pack_padded_sequence

import src.fasta as fasta
from src.alphabets import Uniprot21
import src.models.sequence
import bepler.fasta as fasta
from bepler.alphabets import Uniprot21
import bepler.models.sequence

parser = argparse.ArgumentParser('Train sequence model')

Expand Down Expand Up @@ -88,8 +88,8 @@ def main():

tied = not args.untied

model = src.models.sequence.BiLM(nin, nout, embedding_dim, hidden_dim, num_layers
, mask_idx=mask_idx, dropout=dropout, tied=tied)
model = bepler.models.sequence.BiLM(nin, nout, embedding_dim, hidden_dim, num_layers
, mask_idx=mask_idx, dropout=dropout, tied=tied)
print('# initialized model', file=sys.stderr)

device = args.device
Expand Down
30 changes: 15 additions & 15 deletions train_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_packed_sequence
import torch.utils.data

from src.alphabets import Uniprot21
import src.scop as scop
from src.utils import pack_sequences, unpack_sequences
from src.utils import PairedDataset, AllPairsDataset, collate_paired_sequences
from src.utils import MultinomialResample
import src.models.embedding
import src.models.comparison
from bepler.alphabets import Uniprot21
import bepler.scop as scop
from bepler.utils import pack_sequences, unpack_sequences
from bepler.utils import PairedDataset, AllPairsDataset, collate_paired_sequences
from bepler.utils import MultinomialResample
import bepler.models.embedding
import bepler.models.comparison


def main():
Expand Down Expand Up @@ -197,20 +197,20 @@ def main():
print('# using LM:', args.lm, file=sys.stderr)

if num_layers > 0:
embedding = src.models.embedding.StackedRNN(len(alphabet), input_dim, rnn_dim, embedding_size
, nlayers=num_layers, dropout=dropout, lm=lm)
embedding = bepler.models.embedding.StackedRNN(len(alphabet), input_dim, rnn_dim, embedding_size
, nlayers=num_layers, dropout=dropout, lm=lm)
else:
embedding = src.models.embedding.Linear(len(alphabet), input_dim, embedding_size, lm=lm)
embedding = bepler.models.embedding.Linear(len(alphabet), input_dim, embedding_size, lm=lm)

if args.norm == 'l1':
norm = src.models.comparison.L1()
norm = bepler.models.comparison.L1()
print('# norm: l1', file=sys.stderr)
elif args.norm == 'l2':
norm = src.models.comparison.L2()
norm = bepler.models.comparison.L2()
print('# norm: l2', file=sys.stderr)
model = src.models.comparison.OrdinalRegression(embedding, 5, align_method=compare_type
, compare=norm, allow_insertions=allow_insert
)
model = bepler.models.comparison.OrdinalRegression(embedding, 5, align_method=compare_type
, compare=norm, allow_insertions=allow_insert
)

if use_cuda:
model.cuda()
Expand Down
28 changes: 14 additions & 14 deletions train_similarity_and_contact.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@
from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_packed_sequence
import torch.utils.data

from src.alphabets import Uniprot21
import src.scop as scop
from src.utils import pack_sequences, unpack_sequences
from src.utils import ContactMapDataset, collate_lists
from src.utils import PairedDataset, AllPairsDataset, collate_paired_sequences
from src.utils import MultinomialResample
import src.models.embedding
import src.models.multitask
from src.metrics import average_precision
from bepler.alphabets import Uniprot21
import bepler.scop as scop
from bepler.utils import pack_sequences, unpack_sequences
from bepler.utils import ContactMapDataset, collate_lists
from bepler.utils import PairedDataset, AllPairsDataset, collate_paired_sequences
from bepler.utils import MultinomialResample
import bepler.models.embedding
import bepler.models.multitask
from bepler.metrics import average_precision

cmap_paths = glob.glob('data/SCOPe/pdbstyle-2.06/*/*.png')
cmap_dict = {os.path.basename(path)[:7] : path for path in cmap_paths}
Expand Down Expand Up @@ -438,9 +438,9 @@ def main():
for param in lm.parameters():
param.requires_grad = False

embedding = src.models.embedding.StackedRNN(len(alphabet), input_dim, rnn_dim
, embedding_size, nlayers=num_layers
, dropout=dropout, lm=lm)
embedding = bepler.models.embedding.StackedRNN(len(alphabet), input_dim, rnn_dim
, embedding_size, nlayers=num_layers
, dropout=dropout, lm=lm)

# similarity prediction parameters
similarity_kwargs = {}
Expand All @@ -450,8 +450,8 @@ def main():
width = args.width
cmap_kwargs = {'hidden_dim': hidden_dim, 'width': width}

model = src.models.multitask.SCOPCM(embedding, similarity_kwargs=similarity_kwargs,
cmap_kwargs=cmap_kwargs)
model = bepler.models.multitask.SCOPCM(embedding, similarity_kwargs=similarity_kwargs,
cmap_kwargs=cmap_kwargs)
if use_cuda:
model.cuda()

Expand Down