Skip to content

Commit

Permalink
pdb ftp path updated
Browse files Browse the repository at this point in the history
  • Loading branch information
VGligorijevic committed Jan 26, 2021
1 parent 601d9fd commit e1d8ffd
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
11 changes: 10 additions & 1 deletion preprocessing/create_nrPDB_GO_annot.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,16 +138,25 @@ def write_output_files(fname, pdb2go, go2info, pdb2seq):
onts = ['molecular_function', 'biological_process', 'cellular_component']
selected_goterms = {ont: set() for ont in onts}
selected_proteins = set()
for goterm in go2info:
prots = go2info[goterm]['pdb_chains']
num = len(prots)
namespace = go2info[goterm]['ont']
if num > 49 and num <= 5000:
selected_goterms[namespace].add(goterm)
selected_proteins = selected_proteins.union(prots)
"""
for chain in pdb2go:
goterms = set(pdb2go[chain]['goterms'])
if len(goterms) > 2 and chain in pdb2seq:
for goterm in goterms:
prots = go2info[goterm]['pdb_chains']
num = len(prots)
namespace = go2info[goterm]['ont']
if num > 49 and num < 5000:
if num > 19 and num <= 5000:
selected_goterms[namespace].add(goterm)
selected_proteins = selected_proteins.union(prots)
"""

selected_goterms_list = {ont: list(selected_goterms[ont]) for ont in onts}
selected_gonames_list = {ont: [go2info[goterm]['goname'] for goterm in selected_goterms_list[ont]] for ont in onts}
Expand Down
2 changes: 1 addition & 1 deletion preprocessing/data_collection.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ printf "\n\n DOWNLOADING PDB SEQRES SEQUENCES...\n"
wget ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt.gz -O $DATA_DIR/pdb_seqres.txt.gz

printf "\n\n DOWNLOADING PDB CLUSTERS...\n"
wget ftp://resources.rcsb.org/sequence/clusters/bc-$SEQ_SIM.out -O $DATA_DIR/bc-$SEQ_SIM.out
wget https://cdn.rcsb.org/resources/sequence/clusters/bc-$SEQ_SIM.out -O $DATA_DIR/bc-$SEQ_SIM.out

printf "\n\n DOWNLOADING GO HIERARCHY...\n"
wget http://purl.obolibrary.org/obo/go/go-basic.obo -O $DATA_DIR/go-basic.obo
Expand Down

0 comments on commit e1d8ffd

Please sign in to comment.