diff --git a/.gitignore b/.gitignore index 2cedff1..5683000 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,7 @@ pretrained_models.tar.gz *.swp # cython compiled .c files -src/*.c +bepler/*.c # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/src/__init__.py b/bepler/__init__.py similarity index 100% rename from src/__init__.py rename to bepler/__init__.py diff --git a/src/alignment.pyx b/bepler/alignment.pyx similarity index 100% rename from src/alignment.pyx rename to bepler/alignment.pyx diff --git a/src/alphabets.py b/bepler/alphabets.py similarity index 100% rename from src/alphabets.py rename to bepler/alphabets.py diff --git a/src/fasta.py b/bepler/fasta.py similarity index 100% rename from src/fasta.py rename to bepler/fasta.py diff --git a/src/metrics.pyx b/bepler/metrics.pyx similarity index 100% rename from src/metrics.pyx rename to bepler/metrics.pyx diff --git a/src/models/__init__.py b/bepler/models/__init__.py similarity index 100% rename from src/models/__init__.py rename to bepler/models/__init__.py diff --git a/src/models/comparison.py b/bepler/models/comparison.py similarity index 100% rename from src/models/comparison.py rename to bepler/models/comparison.py diff --git a/src/models/embedding.py b/bepler/models/embedding.py similarity index 100% rename from src/models/embedding.py rename to bepler/models/embedding.py diff --git a/src/models/multitask.py b/bepler/models/multitask.py similarity index 100% rename from src/models/multitask.py rename to bepler/models/multitask.py diff --git a/src/models/sequence.py b/bepler/models/sequence.py similarity index 100% rename from src/models/sequence.py rename to bepler/models/sequence.py diff --git a/src/parse_utils.py b/bepler/parse_utils.py similarity index 100% rename from src/parse_utils.py rename to bepler/parse_utils.py diff --git a/src/pdb.py b/bepler/pdb.py similarity index 100% rename from src/pdb.py rename to bepler/pdb.py diff --git a/src/pfam.py b/bepler/pfam.py similarity index 100% rename from src/pfam.py rename to bepler/pfam.py diff --git a/src/scop.py b/bepler/scop.py similarity index 97% rename from src/scop.py rename to bepler/scop.py index ccb9507..d0a97d3 100644 --- a/src/scop.py +++ b/bepler/scop.py @@ -1,7 +1,7 @@ from __future__ import print_function,division import numpy as np -import src.fasta as fasta +import bepler.fasta as fasta class NullEncoder: def encode(self, x): diff --git a/src/transmembrane.py b/bepler/transmembrane.py similarity index 100% rename from src/transmembrane.py rename to bepler/transmembrane.py diff --git a/src/utils.py b/bepler/utils.py similarity index 100% rename from src/utils.py rename to bepler/utils.py diff --git a/embed_sequences.py b/embed_sequences.py index 450e1ba..b071153 100644 --- a/embed_sequences.py +++ b/embed_sequences.py @@ -8,9 +8,9 @@ import torch.nn as nn import torch.nn.functional as F -from src.alphabets import Uniprot21 -import src.fasta as fasta -import src.models.sequence +from bepler.alphabets import Uniprot21 +import bepler.fasta as fasta +import bepler.models.sequence def unstack_lstm(lstm): @@ -28,12 +28,12 @@ def unstack_lstm(lstm): dest = attr + '0' src = attr + str(i) getattr(layer, dest).data[:] = getattr(lstm, src) - #setattr(layer, dest, getattr(lstm, src)) + #setattr(layer, dest, getattr(lstm, bepler)) dest = attr + '0_reverse' src = attr + str(i) + '_reverse' getattr(layer, dest).data[:] = getattr(lstm, src) - #setattr(layer, dest, getattr(lstm, src)) + #setattr(layer, dest, getattr(lstm, bepler)) layer.flatten_parameters() layers.append(layer) in_size = 2*hidden_dim @@ -101,7 +101,7 @@ def load_model(path, use_cuda=False): if use_cuda: encoder.cuda() - if type(encoder) is src.models.sequence.BiLM: + if type(encoder) is bepler.models.sequence.BiLM: # model is only the LM return encoder.encode, None, None diff --git a/eval_contact_casp12.py b/eval_contact_casp12.py index 095527c..4b197a3 100644 --- a/eval_contact_casp12.py +++ b/eval_contact_casp12.py @@ -10,11 +10,11 @@ from torch.nn.utils.rnn import PackedSequence import torch.utils.data -from src.alphabets import Uniprot21 -import src.fasta as fasta -from src.utils import pack_sequences, unpack_sequences -from src.utils import ContactMapDataset, collate_lists -from src.metrics import average_precision +from bepler.alphabets import Uniprot21 +import bepler.fasta as fasta +from bepler.utils import pack_sequences, unpack_sequences +from bepler.utils import ContactMapDataset, collate_lists +from bepler.metrics import average_precision def load_data(seq_path, struct_path, alphabet, baselines=False): diff --git a/eval_contact_scop.py b/eval_contact_scop.py index 64f6a39..4ad7c88 100644 --- a/eval_contact_scop.py +++ b/eval_contact_scop.py @@ -10,11 +10,11 @@ from torch.nn.utils.rnn import PackedSequence import torch.utils.data -from src.alphabets import Uniprot21 -import src.fasta as fasta -from src.utils import pack_sequences, unpack_sequences -from src.utils import ContactMapDataset, collate_lists -from src.metrics import average_precision +from bepler.alphabets import Uniprot21 +import bepler.fasta as fasta +from bepler.utils import pack_sequences, unpack_sequences +from bepler.utils import ContactMapDataset, collate_lists +from bepler.metrics import average_precision def load_data(seq_path, struct_path, alphabet, baselines=False): diff --git a/eval_secstr.py b/eval_secstr.py index 5638730..a4de293 100644 --- a/eval_secstr.py +++ b/eval_secstr.py @@ -8,9 +8,9 @@ from torch.nn.utils.rnn import PackedSequence import torch.utils.data -from src.alphabets import Uniprot21, SecStr8 -from src.utils import pack_sequences, unpack_sequences -import src.pdb as pdb +from bepler.alphabets import Uniprot21, SecStr8 +from bepler.utils import pack_sequences, unpack_sequences +import bepler.pdb as pdb secstr_train_path = 'data/secstr/ss_cullpdb_pc40_res3.0_R1.0_d180412_filtered.train.fa' diff --git a/eval_similarity.py b/eval_similarity.py index bef2728..ae7ae59 100644 --- a/eval_similarity.py +++ b/eval_similarity.py @@ -11,10 +11,10 @@ from scipy.stats import pearsonr,spearmanr -from src.utils import pack_sequences, unpack_sequences -from src.alphabets import Uniprot21 -from src.alignment import nw_score -from src.metrics import average_precision +from bepler.utils import pack_sequences, unpack_sequences +from bepler.alphabets import Uniprot21 +from bepler.alignment import nw_score +from bepler.metrics import average_precision def encode_sequence(x, alphabet): diff --git a/eval_transmembrane.py b/eval_transmembrane.py index 7013a46..c23321d 100644 --- a/eval_transmembrane.py +++ b/eval_transmembrane.py @@ -9,9 +9,9 @@ from torch.nn.utils.rnn import PackedSequence import torch.utils.data -from src.alphabets import Uniprot21 -from src.parse_utils import parse_3line -import src.transmembrane as tm +from bepler.alphabets import Uniprot21 +from bepler.parse_utils import parse_3line +import bepler.transmembrane as tm def load_3line(path, alphabet): with open(path, 'rb') as f: @@ -63,12 +63,12 @@ def unstack_lstm(lstm): dest = attr + '0' src = attr + str(i) getattr(layer, dest).data[:] = getattr(lstm, src) - #setattr(layer, dest, getattr(lstm, src)) + #setattr(layer, dest, getattr(lstm, bepler)) dest = attr + '0_reverse' src = attr + str(i) + '_reverse' getattr(layer, dest).data[:] = getattr(lstm, src) - #setattr(layer, dest, getattr(lstm, src)) + #setattr(layer, dest, getattr(lstm, bepler)) layers.append(layer) in_size = 2*hidden_dim return layers diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e4ee928 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools >= 40.9.0", "wheel", "Cython", "numpy"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/setup.py b/setup.py index 69307fa..8f6edca 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,12 @@ from distutils.core import setup -from Cython.Build import cythonize + import numpy as np +from Cython.Build import cythonize +from setuptools import find_packages setup( - ext_modules = cythonize(['src/metrics.pyx', 'src/alignment.pyx']), - include_dirs=[np.get_include()] + name="bepler", + packages=find_packages(), + ext_modules=cythonize(["bepler/metrics.pyx", "bepler/alignment.pyx"]), + include_dirs=[np.get_include()], ) diff --git a/train_lm_pfam.py b/train_lm_pfam.py index 9159f58..5d0011e 100644 --- a/train_lm_pfam.py +++ b/train_lm_pfam.py @@ -11,9 +11,9 @@ import torch.utils.data from torch.nn.utils.rnn import pack_padded_sequence -import src.fasta as fasta -from src.alphabets import Uniprot21 -import src.models.sequence +import bepler.fasta as fasta +from bepler.alphabets import Uniprot21 +import bepler.models.sequence parser = argparse.ArgumentParser('Train sequence model') @@ -88,8 +88,8 @@ def main(): tied = not args.untied - model = src.models.sequence.BiLM(nin, nout, embedding_dim, hidden_dim, num_layers - , mask_idx=mask_idx, dropout=dropout, tied=tied) + model = bepler.models.sequence.BiLM(nin, nout, embedding_dim, hidden_dim, num_layers + , mask_idx=mask_idx, dropout=dropout, tied=tied) print('# initialized model', file=sys.stderr) device = args.device diff --git a/train_similarity.py b/train_similarity.py index 7389cc3..66d6585 100644 --- a/train_similarity.py +++ b/train_similarity.py @@ -13,13 +13,13 @@ from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_packed_sequence import torch.utils.data -from src.alphabets import Uniprot21 -import src.scop as scop -from src.utils import pack_sequences, unpack_sequences -from src.utils import PairedDataset, AllPairsDataset, collate_paired_sequences -from src.utils import MultinomialResample -import src.models.embedding -import src.models.comparison +from bepler.alphabets import Uniprot21 +import bepler.scop as scop +from bepler.utils import pack_sequences, unpack_sequences +from bepler.utils import PairedDataset, AllPairsDataset, collate_paired_sequences +from bepler.utils import MultinomialResample +import bepler.models.embedding +import bepler.models.comparison def main(): @@ -197,20 +197,20 @@ def main(): print('# using LM:', args.lm, file=sys.stderr) if num_layers > 0: - embedding = src.models.embedding.StackedRNN(len(alphabet), input_dim, rnn_dim, embedding_size - , nlayers=num_layers, dropout=dropout, lm=lm) + embedding = bepler.models.embedding.StackedRNN(len(alphabet), input_dim, rnn_dim, embedding_size + , nlayers=num_layers, dropout=dropout, lm=lm) else: - embedding = src.models.embedding.Linear(len(alphabet), input_dim, embedding_size, lm=lm) + embedding = bepler.models.embedding.Linear(len(alphabet), input_dim, embedding_size, lm=lm) if args.norm == 'l1': - norm = src.models.comparison.L1() + norm = bepler.models.comparison.L1() print('# norm: l1', file=sys.stderr) elif args.norm == 'l2': - norm = src.models.comparison.L2() + norm = bepler.models.comparison.L2() print('# norm: l2', file=sys.stderr) - model = src.models.comparison.OrdinalRegression(embedding, 5, align_method=compare_type - , compare=norm, allow_insertions=allow_insert - ) + model = bepler.models.comparison.OrdinalRegression(embedding, 5, align_method=compare_type + , compare=norm, allow_insertions=allow_insert + ) if use_cuda: model.cuda() diff --git a/train_similarity_and_contact.py b/train_similarity_and_contact.py index 99197c3..8467cd0 100644 --- a/train_similarity_and_contact.py +++ b/train_similarity_and_contact.py @@ -15,15 +15,15 @@ from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_packed_sequence import torch.utils.data -from src.alphabets import Uniprot21 -import src.scop as scop -from src.utils import pack_sequences, unpack_sequences -from src.utils import ContactMapDataset, collate_lists -from src.utils import PairedDataset, AllPairsDataset, collate_paired_sequences -from src.utils import MultinomialResample -import src.models.embedding -import src.models.multitask -from src.metrics import average_precision +from bepler.alphabets import Uniprot21 +import bepler.scop as scop +from bepler.utils import pack_sequences, unpack_sequences +from bepler.utils import ContactMapDataset, collate_lists +from bepler.utils import PairedDataset, AllPairsDataset, collate_paired_sequences +from bepler.utils import MultinomialResample +import bepler.models.embedding +import bepler.models.multitask +from bepler.metrics import average_precision cmap_paths = glob.glob('data/SCOPe/pdbstyle-2.06/*/*.png') cmap_dict = {os.path.basename(path)[:7] : path for path in cmap_paths} @@ -438,9 +438,9 @@ def main(): for param in lm.parameters(): param.requires_grad = False - embedding = src.models.embedding.StackedRNN(len(alphabet), input_dim, rnn_dim - , embedding_size, nlayers=num_layers - , dropout=dropout, lm=lm) + embedding = bepler.models.embedding.StackedRNN(len(alphabet), input_dim, rnn_dim + , embedding_size, nlayers=num_layers + , dropout=dropout, lm=lm) # similarity prediction parameters similarity_kwargs = {} @@ -450,8 +450,8 @@ def main(): width = args.width cmap_kwargs = {'hidden_dim': hidden_dim, 'width': width} - model = src.models.multitask.SCOPCM(embedding, similarity_kwargs=similarity_kwargs, - cmap_kwargs=cmap_kwargs) + model = bepler.models.multitask.SCOPCM(embedding, similarity_kwargs=similarity_kwargs, + cmap_kwargs=cmap_kwargs) if use_cuda: model.cuda()