Skip to content

Commit

Permalink
added Interface coverage cutoff (#342)
Browse files Browse the repository at this point in the history
* added interface coverage cutoff parameter

* adjusted tests for interface coverage cutoff

* removed global variable
  • Loading branch information
mgiulini authored Sep 5, 2023
1 parent 0803e50 commit 489af35
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 12 deletions.
11 changes: 11 additions & 0 deletions src/arctic3d/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,14 @@
default=0,
)

argument_parser.add_argument(
"--int_cov_cutoff",
help="Interface coverage cutoff (%)",
type=float,
required=False,
default=0.7,
)


def load_args(arguments):
"""
Expand Down Expand Up @@ -177,6 +185,7 @@ def main(
linkage_strategy,
threshold,
min_clust_size,
int_cov_cutoff,
log_level="DEBUG",
):
"""Main function."""
Expand Down Expand Up @@ -270,6 +279,7 @@ def main(
pdb_to_use=pdb_to_use,
chain_to_use=chain_to_use,
pdb_data=pdb_data_path,
int_cov_cutoff=int_cov_cutoff,
)

if pdb_f is None:
Expand All @@ -291,6 +301,7 @@ def main(
pdb_path=pdb_f,
linkage_strategy=linkage_strategy,
threshold=threshold,
int_cov_cutoff=int_cov_cutoff,
)

log.info(f"Clustered interfaces {cl_dict}")
Expand Down
8 changes: 6 additions & 2 deletions src/arctic3d/modules/cluster_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
log = logging.getLogger("arctic3d.log")


def cluster_interfaces(interface_dict, pdb_path, linkage_strategy, threshold):
def cluster_interfaces(
interface_dict, pdb_path, linkage_strategy, threshold, int_cov_cutoff=0.7
):
"""
Wrapper to call interface_matrix and clustering
Expand All @@ -20,14 +22,16 @@ def cluster_interfaces(interface_dict, pdb_path, linkage_strategy, threshold):
linkage strategy for clustering
threshold : float
threshold for clustering
int_cov_cutoff : float
interface coverage cutoff
Returns
-------
clustered_residues : dict
dictionary of the clustered interfaces
"""
filtered_interfaces, matrix_path = interface_matrix(
interface_dict, pdb_path
interface_dict, pdb_path, int_cov_cutoff
)
if len(filtered_interfaces) > 0:
clustered_residues = interface_clustering(
Expand Down
22 changes: 15 additions & 7 deletions src/arctic3d/modules/interface_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from scipy.spatial.distance import cdist

SIGMA = 1.9
INTERFACE_COV_CUTOFF = 0.7

log = logging.getLogger("arctic3d.log")

Expand Down Expand Up @@ -154,7 +153,7 @@ def get_unique_sorted_resids(interface_dict):
return int_resids


def filter_interfaces(interface_dict, pdb_resids):
def filter_interfaces(interface_dict, pdb_resids, int_cov_cutoff=0.7):
"""
Filters the interfaces accoriding to the residues present in the pdb
Expand All @@ -166,24 +165,30 @@ def filter_interfaces(interface_dict, pdb_resids):
pdb_resids : np.array
residues present in the pdb
int_cov_cutoff : float
interface coverage cutoff
Returns
-------
retained_interfaces : dict
dictionary of the retained and filtered interfaces
example : interface_dict = {"a" : [1,2], "b" : [2,3,4], "c": [5,6,7]}
pdb_resids = np.array([3,4,5,6,7])
then, if INTERFACE_COV_CUTOFF < 0.66:
then, if int_cov_cutoff < 0.66:
retained_interfaces = {"b": [3,4], "c" : [5,6,7]}
else:
retained_interfaces = {"c" : [5,6,7]}
"""
log.debug("Filtering interface dictionary")
log.debug(
"Filtering interface dictionary "
f"with interface coverage cutoff = {int_cov_cutoff}"
)
retained_interfaces = {}
for key in interface_dict.keys():
coverage, filtered_interface = check_residues_coverage(
interface_dict[key], pdb_resids
)
if coverage > INTERFACE_COV_CUTOFF:
if coverage > int_cov_cutoff:
# formatting the interface name to avoid spaces
formatted_key = format_interface_name(key)
retained_interfaces[formatted_key] = filtered_interface
Expand All @@ -210,7 +215,7 @@ def format_interface_name(int_name):
return formatted_name


def interface_matrix(interface_dict, pdb_path):
def interface_matrix(interface_dict, pdb_path, int_cov_cutoff=0.7):
"""
Computes the interface matrix.
Expand All @@ -235,7 +240,10 @@ def interface_matrix(interface_dict, pdb_path):
raise Exception(f"pdb_path {pdb_path} does not exist")
mdu = mda.Universe(pdb_path)
pdb_resids = mdu.select_atoms("name CA").resids
retained_interfaces = filter_interfaces(interface_dict, pdb_resids)
retained_interfaces = filter_interfaces(
interface_dict, pdb_resids, int_cov_cutoff
)
print(f"retained_interfaces: {retained_interfaces}")
ret_keys = list(retained_interfaces.keys())
log.debug(f"Retained interfaces: {ret_keys}")
n_ret = len(ret_keys)
Expand Down
10 changes: 8 additions & 2 deletions src/arctic3d/modules/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ def unlink_files(suffix="pdb", to_exclude=None):
fpath.unlink()


def get_maxint_pdb(validated_pdbs, interface_residues):
def get_maxint_pdb(validated_pdbs, interface_residues, int_cov_cutoff=0.7):
"""
Get PDB ID that retains the most interfaces.
Expand All @@ -700,6 +700,8 @@ def get_maxint_pdb(validated_pdbs, interface_residues):
List of (pdb_f, hit) tuples
interface_residues : dict
Dictionary of all the interfaces (each one with its uniprot ID as key)
int_cov_cutoff : float
Interface coverage cutoff.
Returns
-------
Expand Down Expand Up @@ -730,7 +732,7 @@ def get_maxint_pdb(validated_pdbs, interface_residues):
selection_string = f"name CA and chainID {chain_id.upper()}"
pdb_resids = mdu.select_atoms(selection_string).resids
tmp_filtered_interfaces = filter_interfaces(
interface_residues, pdb_resids
interface_residues, pdb_resids, int_cov_cutoff
)
curr_nint = len(tmp_filtered_interfaces)
if curr_nint > max_nint: # update "best" hit
Expand Down Expand Up @@ -790,6 +792,7 @@ def get_best_pdb(
pdb_to_use=None,
chain_to_use=None,
pdb_data=None,
int_cov_cutoff=0.7,
):
"""
Get best PDB ID.
Expand All @@ -806,6 +809,8 @@ def get_best_pdb(
Chain id to be used.
pdb_data : Path or None
pdb json file for offline mode.
int_cov_cutoff : float
Interface coverage cutoff.
Returns
-------
Expand Down Expand Up @@ -845,6 +850,7 @@ def get_best_pdb(
pdb_f, cif_f, top_hit, filtered_interfaces = get_maxint_pdb(
validated_pdbs_and_cifs,
interface_residues,
int_cov_cutoff=int_cov_cutoff,
)

if pdb_f is None or cif_f is None:
Expand Down
8 changes: 7 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def test_cli_empty():
ligand=None,
linkage_strategy=None,
threshold=None,
int_cov_cutoff=None,
min_clust_size=None,
)
# assert exit code
Expand All @@ -38,7 +39,12 @@ def test_cli_empty():


def test_cli_full():
"""Test main cli with uniprot ID with one interface."""
target_uniprot = "W5JXD7"
exp_dir = Path(f"arctic3d-{target_uniprot}")
# delete folder if exists
if exp_dir.exists():
shutil.rmtree(exp_dir)
start_cwd = os.getcwd()
exit_code = main(
input_arg=target_uniprot,
Expand All @@ -56,10 +62,10 @@ def test_cli_full():
linkage_strategy=None,
threshold=None,
min_clust_size=1,
int_cov_cutoff=0.7,
)
assert exit_code == 0
os.chdir(start_cwd)
exp_dir = Path(f"arctic3d-{target_uniprot}")
assert exp_dir.exists() is True
# Check that the log file has been created
assert Path(exp_dir, "arctic3d.log").exists()
Expand Down
10 changes: 10 additions & 0 deletions tests/test_interface_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,16 @@ def test_filter_interfaces(example_mdu, example_interface_dict):
example_interface_dict, pdb_resids
)
assert expected_filter_dict == observed_filter_dict
# lower int_cov_cutoff
expected_filter_dict = {
"int_1": [1, 2],
"int_2": [1, 2, 4],
"int_3": [250],
}
observed_filter_dict = filter_interfaces(
example_interface_dict, pdb_resids, int_cov_cutoff=0.4
)
assert expected_filter_dict == observed_filter_dict


def test_interface_matrix(example_interface_dict, example_pdbpath):
Expand Down

0 comments on commit 489af35

Please sign in to comment.