Source code for chilife.protein_utils

import os
import pickle
import math

from pathlib import Path
from typing import Set, List, Union, Tuple

from numpy.typing import ArrayLike
from scipy.spatial.distance import cdist
from scipy.spatial import cKDTree
from dataclasses import dataclass

try:
    from rdkit import Chem
    from rdkit.Chem import AllChem
    rdkit_found = True
except:
    rdkit_found = False

import MDAnalysis
import numpy as np
from scipy.spatial.transform import Rotation

from MDAnalysis.core.topologyattrs import Atomindices, Resindices, Segindices, Segids
import MDAnalysis as mda

from .globals import SUPPORTED_RESIDUES
from .MolSys import MolecularSystemBase, MolSys, concat_molsys
from .numba_utils import get_sasa, _ic_to_cart
from .pdb_utils import get_backbone_atoms, get_bb_candidates
import chilife as xl

import chilife.scoring as scoring
import chilife.io as io
import chilife.RotamerEnsemble as re

from .base_classes import Ensemble

import igraph as ig


[docs] def get_dihedral_rotation_matrix(theta: float, v: ArrayLike) -> ArrayLike: """Build a matrix that will rotate coordinates about a vector, v, by theta in radians. Parameters ---------- theta : float Rotation angle in radians. v : (3,) ArrayLike Three dimensional vector to rotate about. Returns ------- rotation_matrix : np.ndarray Matrix that will rotate coordinates about the vector, V by angle theta. """ # Normalize input vector v = v / np.linalg.norm(v) # Compute Vx matrix Vx = np.zeros((3, 3)) Vx[[2, 0, 1], [1, 2, 0]] = v Vx -= Vx.T # Rotation matrix. See https://en.wikipedia.org/wiki/Rotation_matrix#Rotation_matrix_from_axis_and_angle rotation_matrix = ( np.identity(3) * np.cos(theta) + np.sin(theta) * Vx + (1 - np.cos(theta)) * np.outer(v, v) ) return rotation_matrix
[docs] def get_dihedral(p: ArrayLike) -> float: """Calculates dihedral of a given set of atoms, ``p`` . Returns value in radians. .. code-block:: python 3 ------> / 1-------2 / 0 Parameters ---------- p : (4, 3) ArrayLike Matrix containing coordinates to be used to calculate dihedral. Returns ------- dihedral : float Dihedral angle in radians. """ # Unpack p p0, p1, p2, p3 = p # Define vectors from coordinates b0 = -1.0 * (p1 - p0) b1 = p2 - p1 b2 = p3 - p2 # Normalize dihedral bond vector b1 /= np.linalg.norm(b1) # Calculate dihedral projections orthogonal to the bond vector v = b0 - np.dot(b0, b1) * b1 w = b2 - np.dot(b2, b1) * b1 # Calculate angle between projections x = np.dot(v, w) y = np.dot(np.cross(b1, v), w) dihedral = math.atan2(y, x) return dihedral
[docs] def get_dihedrals(p1: ArrayLike, p2: ArrayLike, p3: ArrayLike, p4: ArrayLike) -> ArrayLike: """Vectorized version of get_dihedral Parameters ---------- p1 : ArrayLike Array containing coordinates of the first point in the dihedral. p2 : ArrayLike Array containing coordinates of the second point in the dihedral p3 : ArrayLike Array containing coordinates of the third point in the dihedral p4 : ArrayLike Array containing coordinates of the fourth point in the dihedral Returns ------- dihedrals : ArrayLike Dihedral angles in radians. """ # Define vectors from coordinates b0 = p1 - p2 b1 = p3 - p2 b2 = p4 - p3 # Normalize dihedral bond vector b1 /= np.linalg.norm(b1, axis=-1)[:, None] # Calculate dihedral projections orthogonal to the bond vector v = b0 - np.einsum('ij,ij->i', b0, b1)[:, None] * b1 w = b2 - np.einsum('ij,ij->i',b2, b1)[:, None] * b1 # Calculate angle between projections x = np.einsum('ij,ij->i', v, w) y = np.einsum('ij,ij->i', np.cross(b1, v, axis=-1), w) dihedral = np.arctan2(y, x) return dihedral
[docs] def get_angle(p: ArrayLike) -> float: r"""Calculate the angle created by 3 points. .. code-block:: python p2 / θ \ p1 p3 Parameters ---------- p: ArrayLike : Array of three points to calculate the angle between. Returns ------- angle : float Angle created by the three points. """ p1, p2, p3 = p v1 = p1 - p2 v2 = p3 - p2 X = v1 @ v2 Y = np.cross(v1, v2) Y = math.sqrt(Y @ Y) angle = math.atan2(Y, X) return angle
[docs] def get_angles(p1: ArrayLike, p2: ArrayLike, p3: ArrayLike) -> ArrayLike: r"""Vectorized version of get_angle. Parameters ---------- p1: ArrayLike : Array of first points in the angles. p2: ArrayLike : Array of second points in the angles. p3: ArrayLike : Array of third points in angle. Returns ------- angles : float Array of anlges. """ v1 = p1 - p2 v2 = p3 - p2 X = np.einsum('ij,ij->i', v1, v2) Y = np.cross(v1, v2, axis=-1) Y = np.sqrt((Y * Y).sum(axis=-1)) angles = np.arctan2(Y, X) return angles
[docs] def set_dihedral(p: ArrayLike, angle: float, mobile: ArrayLike) -> ArrayLike: """Sets the dihedral angle by rotating all ``mobile`` atoms from their current position about the dihedral bond defined by the four atoms in ``p`` . Dihedral will be set to the value of ``angle`` in degrees. Parameters ---------- p : ArrayLike Coordinates of atoms that define dihedral to rotate about. angle : float New angle to set the dihedral to (degrees). mobile : np.ndarray Atom coordinates to move by setting dihedral. Returns ------- new_mobile : np.ndarray New positions for the mobile atoms """ current = get_dihedral(p) angle = np.deg2rad(angle) - current angle = angle ori = p[1] mobile -= ori v = p[2] - p[1] v /= np.linalg.norm(v) R = get_dihedral_rotation_matrix(angle, v) new_mobile = R.dot(mobile.T).T + ori return new_mobile
[docs] def guess_chain(protein, site): """ Reads chain from protein or makes an educated guess on which chain a particular site resides on for a given Protein/Universe/AtomGroup. Parameters ---------- protein : mda.Universe | mda.AtomGroup | chilife.MolSys The protein being labeled. site : int The residue being labeled. Returns ------- chain : str Best guess for the chain on which the selected residue resides. """ if protein is None: chain = "A" elif len(set(protein.segments.segids)) == 1: chain = protein.segments.segids[0] elif np.isin(protein.residues.resnums, site).sum() == 0: raise ValueError(f"Residue {site} is not present on the provided protein") elif np.isin(protein.residues.resnums, site).sum() == 1: chain = protein.select_atoms(f"resid {site}").segids[0] else: raise ValueError( f"Residue {site} is present on more than one chain. Please specify the desired chain" ) return chain
[docs] def new_chain(protein): """Get the next unique chain identifier from a molecular system. Used for creating new chain IDs for LigandEnsembles Parameters ---------- protein: Union[mda.Universe, mda.AtomGroup, chilife.MolSys] The protein/molecular system with chain IDs that should not be used. Returns ------- chain : str The next unique chain identifier. """ if protein is None: return "A" segids = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") for seg in protein.segments: segids.remove(seg.segid) chain = segids.pop(0) return chain
[docs] def guess_mobile_dihedrals(ICs, aln_atoms=None, mask=False): """ Guess the flexible, uniqe, side chain dihedrals of a MolSysIC object. Parameters ---------- ICs : MolSysIC Internal coordinates object you wish to guess dihedrals for. aln_atoms : List[str] List of atom names corresponding to the "alignment atoms" of the molecule. These are usually the core backbone atoms, e.g. N CA C for a protein. mask : bool Return a boolean array to mask the internal coordinates z-matrix. Returns ------- dihedral_defs : List[List[str]] List of unique mobile dihedral definitions """ if aln_atoms is not None: root_idx = np.argwhere(ICs.atom_names == aln_atoms[1]).flat[0] aname_lst = ICs.atom_names.tolist() neighbor_idx = [aname_lst.index(a) for a in aln_atoms[::2]] aln_idx = [neighbor_idx[0], root_idx, neighbor_idx[1]] bb_atoms = aln_idx + get_backbone_atoms(ICs.topology.graph, root_idx, neighbor_idx, sorted_args=aln_idx) bb_atoms = [ICs.atom_names[i] for i in bb_atoms] bb_atoms += [ICs.atom_names[i] for i in ICs.topology.graph.neighbors(root_idx) if i not in neighbor_idx] else: bb_atoms = get_bb_candidates(ICs.atom_names, ICs.resnames[0]) bb_atoms = [atom for atom in bb_atoms if atom in ICs.atom_names] bb_idxs = np.argwhere(np.isin(ICs.atom_names, bb_atoms)).flatten() neighbors = set(sum([ICs.topology.graph.neighbors(i) for i in bb_idxs], start=[])) neighbor_names = [ICs.atom_names[n] for n in neighbors] bb_atoms += [n for n in neighbor_names if n not in bb_atoms] sc_mask = ~np.isin(ICs.atom_names, bb_atoms) ha_mask = ~(ICs.atom_types=='H') mask = ha_mask * sc_mask idxs = np.argwhere(mask).flatten() cyverts = ICs.topology.ring_idxs rotatable_bonds = {} _idxs = [] for idx in idxs: dihedral = ICs.z_matrix_idxs[idx] bond = tuple(dihedral[1:3]) # Skip duplicate dihedral defs if bond in rotatable_bonds: continue # Skip bonds that define ic_mx elif idx < 3: continue # Skip double bonds elif ICs.bond_types[tuple(sorted(bond[::-1]))] > 1: continue # Skip ring dihedrals elif any(all(a in ring for a in bond) for ring in cyverts): continue else: rotatable_bonds[bond] = dihedral _idxs.append(idx) idxs = _idxs dihedral_defs = [ICs.z_matrix_names[idx][::-1] for idx in idxs] return dihedral_defs
[docs] @dataclass class FreeAtom: """Atom class for atoms in cartesian space that do not belong to a MolSys. Attributes ---------- name : str Atom name atype : str Atom type index : int Atom number resn : str Name of the residue that the atom belongs to resi : int The residue index/number that the atom belongs to coords : np.ndarray The cartesian coordinates of the Atom """ name: str atype: str index: int resn: str resi: int coords: np.ndarray
[docs] def save_ensemble(name: str, atoms: ArrayLike, coords: ArrayLike = None) -> None: """Save a rotamer ensemble as multiple states of the same molecule. Parameters ---------- name : str file name to save rotamer ensemble to atoms : ArrayLike list of Atom objects coords : ArrayLike Array of atom coordinates corresponding to Atom objects """ if not name.endswith(".pdb"): name += ".pdb" if coords is None and isinstance(atoms[0], list): with open(name, "w", newline="\n") as f: for i, model in enumerate(atoms): f.write(f"MODEL {i + 1}\n") for atom in model: f.write( f"ATOM {atom.index + 1:5d} {atom.name:<4s}{atom.resn:3s} {'A':1s}{atom.resi:4d} " f"{atom._coords[0]:8.3f}{atom._coords[1]:8.3f}{atom._coords[2]:8.3f}{1.0:6.2f}{1.0:6.2f} " f" {atom.atype:>2s}\n" ) f.write("ENDMDL\n") elif len(coords.shape) > 2: with open(name, "w", newline="\n") as f: for i, model in enumerate(coords): f.write(f"MODEL {i + 1}\n") for atom, coord in zip(atoms, model): f.write( f"ATOM {atom.index + 1:5d} {atom.name:<4s}{atom.resn:3s} {'A':1s}{atom.resi:4d} " f"{coord[0]:8.3f}{coord[1]:8.3f}{coord[2]:8.3f}{1.0:6.2f}{1.0:6.2f} {atom.atype:>2s}\n" ) f.write("ENDMDL\n") else: save_pdb(name, atoms, coords)
[docs] def save_pdb(name: Union[str, Path], atoms: ArrayLike, coords: ArrayLike, mode: str = "w") -> None: """Save a single state pdb structure of the provided atoms and coords. Parameters ---------- name : str, Path Name or Path object of file to save atoms : ArrayLike List of Atom objects to be saved coords : ArrayLike Array of atom coordinates corresponding to atoms mode : str File open mode. Usually used to specify append ("a") when you want to add structures to a PDB rather than overwrite that pdb. """ name = Path(name) if isinstance(name, str) else name name = name.with_suffix(".pdb") with open(name, mode, newline="\n") as f: f.write('MODEL\n') for atom, coord in zip(atoms, coords): f.write( f"ATOM {atom.index + 1:5d} {atom.name:^4s} {atom.resn:3s} {'A':1s}{atom.resi:4d} " f"{coord[0]:8.3f}{coord[1]:8.3f}{coord[2]:8.3f}{1.0:6.2f}{1.0:6.2f} {atom.atype:>2s} \n" ) f.write('ENDMDL\n')
[docs] def get_missing_residues( protein: Union[MDAnalysis.Universe, MDAnalysis.AtomGroup], ignore: Set[int] = None, use_H: bool = False, ignore_waters = True ) -> List: """Get a list of RotamerEnsemble objects corresponding to the residues of the provided protein that are missing heavy atoms. Parameters ---------- protein : MDAnalysis.Universe, MDAnalysis.AtomGroup MolSys to search for residues with missing atoms. ignore : set List of residue numbers to ignore. Usually sites you plan to label or mutate. use_H : bool Whether the new side chain should have hydrogen atoms. Returns ------- missing_residues : list A list of RotamerEnsemble objects corresponding to residues with missing heavy atoms. """ ignore = set() if ignore is None else ignore missing_residues = [] cache = {} for res in protein.residues: # Only consider supported residues because otherwise chiLife wouldn't know what's missing if ( res.resname not in SUPPORTED_RESIDUES or res.resnum in ignore or res.resname in ["ALA", "GLY"] ): continue # Check if there are any missing heavy atoms heavy_atoms = res.atoms.types[res.atoms.types != "H"] resn = res.resname a = cache.setdefault(resn, len(xl.RotamerEnsemble(resn).atom_names)) if len(heavy_atoms) != a: missing_residues.append( xl.RotamerEnsemble( res.resname, res.resnum, protein=protein, chain=res.segid, use_H=use_H, ignore_waters=ignore_waters ) ) return missing_residues
[docs] def mutate( protein: MDAnalysis.Universe, *ensembles: 'RotamerEnsemble', add_missing_atoms: bool = True, rotamer_index: Union[int, str, None] = None, use_H: bool = None, ignore_waters: bool = None ) -> MDAnalysis.Universe: """Create a new Universe where the native residue is replaced with the highest probability rotamer from a RotamerEnsemble or SpinLabel object. Parameters ---------- protein : MDAnalysis.Universe An MDA Universe object containing protein to be spin labeled ensembles : RotamerEnsemble, SpinLabel Precomputed RotamerEnsemble or SpinLabel object to use for selecting and replacing the spin native amino acid rotamer_index : int, str, None Index of the rotamer to be used for the mutation. If None, the most probable rotamer will be used. If 'all', mutate will return an ensemble with each rotamer, if random, mutate will return an ensemble with a random rotamer. add_missing_atoms : bool Model side chains missing atoms if they are not present in the provided structure. ignore_waters : bool ignore waters when selecting conforers for mutation. Returns ------- U : MDAnalysis.Universe New Universe with a copy of the spin labeled protein with the highest probability rotamer """ # Check for dRotamerEnsembles in ensembles temp_ensemble = [] for lib in ensembles: if isinstance(lib, Ensemble): temp_ensemble.append(lib) else: raise TypeError(f"mutate only accepts (d)RotamerEnsemble and (d)SpinLabel objects, not {lib}.") ensembles = temp_ensemble if add_missing_atoms: use_H = use_H if use_H is not None else param_from_rotlibs('use_H', ensembles) ignore_waters = ignore_waters if ignore_waters is not None else param_from_rotlibs('ignore_waters', ensembles) missing_residues = get_missing_residues(protein, ignore={res.site for res in ensembles}, use_H=use_H, ignore_waters=ignore_waters) ensembles = list(ensembles) + missing_residues label_sites = {} for spin_label in ensembles: if isinstance(spin_label, xl.dRotamerEnsemble): label_sites[spin_label.site1, spin_label.icode1, spin_label.chain] = spin_label label_sites[spin_label.site2, spin_label.icode2, spin_label.chain] = spin_label else: label_sites[spin_label.site, spin_label.icode, spin_label.chain] = spin_label # Remove waters if they are being ignored. if ignore_waters: protein = protein.select_atoms('(not altloc B) and (not (byres name OH2 or resname HOH))') else: protein = protein.select_atoms('(not altloc B)') label_selstr = " or ".join([f"({label.selstr})" for label in ensembles]) other_atoms = protein.select_atoms(f"not ({label_selstr})") resids = [res.resid for res in protein.residues] icodes = [res.icode for res in protein.residues] if hasattr(protein, "icodes") else None # Allocate lists for universe information atom_info = [] res_names = [] segidx = [] bonds = [] # Loop over residues in old universe for i, res in enumerate(protein.residues): resloc = (res.resid, res.icode, res.segid) # If the residue is the spin labeled residue replace it with the highest probability spin label if resloc in label_sites: rot_ens = label_sites[resloc] if isinstance(rot_ens, xl.dRotamerEnsemble): r1l = len(rot_ens.rl1mask) r2l = len(rot_ens.rl2mask) both = r1l + r2l if resloc[0] == rot_ens.site1: # Add site 1 atom_info += [ (i, name, atype) for name, atype in zip(rot_ens.atom_names[:r1l], rot_ens.atom_types[:r2l]) ] elif resloc[0] == rot_ens.site2: atom_info += [ (i, name, atype) for name, atype in zip(rot_ens.atom_names[r1l:r1l + r2l], rot_ens.atom_types[r1l:r1l + r2l]) ] # Add cap atom_info += [ (i, name, atype) for name, atype in zip(rot_ens.atom_names[both:], rot_ens.atom_types[both:]) ] else: raise RuntimeError("The residue specified is not part of the dRotamerEnsemble being constructed") else: atom_info += [ (i, name, atype) for name, atype in zip(rot_ens.atom_names, rot_ens.atom_types) ] offset = len(atom_info) - len(rot_ens.atom_names) _add_peptide_bond(atom_info, rot_ens, bonds) bonds.extend([[b1+offset, b2 + offset] for b1, b2 in rot_ens.bonds]) # Add missing Oxygen from rotamer ensemble res_names.append(rot_ens.res) segidx.append(rot_ens.segindex) # Else retain the atom information from the parent universe else: atom_info += [ (i, atom.name, atom.type) for atom in res.atoms if atom.altLoc != "B" ] res_names.append(res.resname) segidx.append(res.segindex) # Reindex segments in case any were dropped from the parent universe idxmap = {idx: i for i, idx in enumerate(np.unique(segidx))} segidx = np.fromiter((idxmap[idx] for idx in segidx), dtype=int) # Unzip atom information into individual lists residx, atom_names, atom_types = zip(*atom_info) segids = protein.segments.segids # Allocate a new universe with the appropriate information if isinstance(protein, (mda.Universe, mda.AtomGroup)): U = make_mda_uni(atom_names, atom_types, res_names, residx, resids, segidx, segids, icodes=icodes) elif isinstance(protein, MolecularSystemBase): U = MolSys.from_arrays(atom_names, atom_types, res_names, residx, resids, segidx, segids=segids, icodes=icodes) U.add_bonds(bonds) # Apply old coordinates to non-spinlabel atoms new_other_atoms = U.select_atoms(f"not ({label_selstr})") new_other_atoms.atoms.positions = other_atoms.atoms.positions if rotamer_index == 'all': max_label_len = max([len(label) for label in label_sites.values()]) coordinates = np.array([U.atoms.positions.copy() for _ in range(max_label_len)]) U.load_new(coordinates) for i, ts in enumerate(U.trajectory): for spin_label in label_sites.values(): sl_atoms = U.select_atoms(spin_label.selstr) if len(spin_label) <= i: sl_atoms.positions = spin_label.coords[-1] else: sl_atoms.positions = spin_label.coords[i] else: for spin_label in label_sites.values(): sl_atoms = U.select_atoms(spin_label.selstr) if rotamer_index == 'random': rand_idx = np.random.choice(len(spin_label.coords), p=spin_label.weights) sl_atoms.atoms.positions = spin_label.coords[rand_idx] elif isinstance(rotamer_index, int): sl_atoms.atoms.positions = spin_label.coords[rotamer_index] else: sl_atoms.atoms.positions = spin_label.coords[np.argmax(spin_label.weights)] return U
def _add_peptide_bond(atom_info, rot_ens, bonds): res_num = atom_info[-1][0] offset = len(atom_info) - len(rot_ens.atom_names) if res_num > 1 and 'N' in rot_ens.atom_names: C_idx = None wback = 1 prev_res = atom_info[offset - wback][0] while prev_res == res_num - 1: if atom_info[offset - wback][1] == 'C': C_idx = offset - wback break wback += 1 if C_idx is not None: N_idx = np.argwhere(rot_ens.atom_names == 'N').flatten()[0] + offset bonds.append([C_idx, N_idx])
[docs] def randomize_rotamers( protein: Union[mda.Universe, mda.AtomGroup], rotamer_libraries: List['RotamerEnsemble'], **kwargs, ) -> None: """Modify a protein object in place to randomize side chain conformations. Parameters ---------- protein : MDAnalysis.Universe, MDAnalysis.AtomGroup MolSys object to modify. rotamer_libraries : list RotamerEnsemble objects attached to the protein corresponding to the residues to be repacked/randomized. **kwargs : dict Additional Arguments to pass to ``sample`` method. See :mod:`sample <chiLife.RotamerEnsemble.sample>` . """ for rotamer in rotamer_libraries: coords, weight = rotamer.sample(off_rotamer=kwargs.get("off_rotamer", False)) mask = ~np.isin(protein.ix, rotamer.clash_ignore_idx) protein.atoms[~mask].positions = coords
[docs] def param_from_rotlibs(param: str, ensembles: List['RotamerEnsemble']): """ Get the value of a parameter used across a set of rotamer ensembles if that parameter is consistent. If the parameter is not the same value for all rotamer ensembles, an AttributeError will be thrown. Parameters ---------- param : str The name of the parameter. ensembles : List[RotamerEnsembles] The rotamer ensembles to search for the parameter int Returns ------- param_value: Any The value of the parameter across all the rotamer libarries. """ # If there are no residues being mutated just return defaults if len(ensembles) == 0: defaults = {} defaults.update(re.assign_defaults(defaults)) return defaults[param] # Otherise check to make sure they are all the same ensemble_params = [getattr(ensemble, param, None) for ensemble in ensembles] if all(e == ensemble_params[0] for e in ensemble_params[1:]): param_value = ensemble_params[0] return param_value # And return an error if they are not else: raise AttributeError(f"User provided ensembles with different {param} parameters. Make sure all " f"ensembles use the same value for {param}")
[docs] def get_sas_res( protein: Union[mda.Universe, mda.AtomGroup], cutoff: float = 30, forcefield = 'charmm', ) -> Set[Tuple[int, str]]: """Run FreeSASA to get solvent accessible surface residues in the provided protein Parameters ---------- protein : MDAnalysis.Universe, MDAnalysis.AtomGroup MolSys object to measure Solvent Accessible Surfaces (SAS) area of and report the SAS residues. cutoff : float Exclude residues from list with SASA below cutoff in angstroms squared. forcefield : Union[str, chilife.ljParams] Forcefiled to use defining atom radii for calculating solvent accessibility. Returns ------- SAResi : set A set of solvent accessible surface residues. """ if isinstance(forcefield, str): forcefield = scoring.ljEnergyFunc(forcefield) environment_coords = protein.atoms.positions environment_radii = forcefield.get_lj_rmin(protein.atoms.types) atom_sasa = get_sasa(environment_coords, environment_radii, by_atom=True) SASAs = {(residue.resnum, residue.segid) for residue in protein.residues if atom_sasa[0, residue.atoms.ix].sum() >= cutoff} return SASAs
[docs] def pose2mda(pose) -> MDAnalysis.Universe: """Create an MDAnalysis universe from a pyrosetta pose Parameters ---------- pose : pyrosetta.rosetta.core.Pose pyrosetta pose object. Returns ------- mda_protein : MDAnalysis.Universe Copy of the input pose as an MDAnalysis Universe object """ coords = np.array( [ res.xyz(atom) for res in pose.residues for atom in range(1, res.natoms() + 1) if res.atom_type(atom).element().strip() != "X" ] ) atypes = np.array( [ str(res.atom_type(atom).element()).strip() for res in pose.residues for atom in range(1, res.natoms() + 1) if res.atom_type(atom).element().strip() != "X" ] ) anames = np.array( [ str(res.atom_name(atom)).strip() for res in pose.residues for atom in range(1, res.natoms() + 1) if res.atom_type(atom).element().strip() != "X" ] ) resindices = np.array( [ res.seqpos() - 1 for res in pose.residues for atom in range(1, res.natoms() + 1) if res.atom_type(atom).element().strip() != "X" ] ) n_residues = len(pose) segindices = np.array([0] * n_residues) resnames = np.array([res.name() for res in pose]) resnums = np.array([res.seqpos() for res in pose]) mda_protein = make_mda_uni(anames, atypes, resnames, resindices, resnums, segindices) mda_protein.positions = coords return mda_protein
[docs] def xplor2mda(xplor_sim) -> MDAnalysis.Universe: """ Converts an Xplor-NIH xplor.simulation object to an MDAnalysis Universe object Parameters ---------- xplor_sim : xplor.simulation an Xplor-NIH simulation object. (See https://nmr.cit.nih.gov/xplor-nih/doc/current/python/ref/simulation.html) Returns ------- """ n_atoms = xplor_sim.numAtoms() a_names = np.array([xplor_sim.atomName(i) for i in range(n_atoms)]) a_types = np.array([xplor_sim.chemType(i)[0] for i in range(n_atoms)]) resnames = np.array([xplor_sim.residueName(i) for i in range(len(xplor_sim.residueNameArr()))]) resnums = np.array(xplor_sim.residueNumArr()) _, uidx = np.unique(resnums, return_index=True) _, resindices = np.unique(resnums, return_inverse=True) resnums = resnums[uidx] resnames = resnames[uidx] n_residues = len(resnames) segindices = np.array([0] * n_residues) mda_protein = make_mda_uni(a_names, a_types, resnames, resindices, resnums, segindices) mda_protein.atoms.positions = np.array(xplor_sim.atomPosArr()) return mda_protein
[docs] def make_mda_uni(anames: ArrayLike, atypes: ArrayLike, resnames: ArrayLike, resindices: ArrayLike, resnums: ArrayLike, segindices: ArrayLike, segids: ArrayLike = None, icodes: ArrayLike = None, ) -> MDAnalysis.Universe: """ Create an MDAnalysis universe from numpy arrays of atom information. Parameters ---------- anames : ArrayLike Array of atom names. Length should be equal to the number of atoms. atypes : ArrayLike Array of atom elements or types. Length should be equal to the number of atoms. resnames : ArrayLike Array of residue names. Length should be equal to the number of residues. resindices : ArrayLike Array of residue indices. Length should be equal to the number of atoms. Elements of resindices should map to resnames and resnums of the atoms they represent. resnums : ArrayLike Array of residue numbers. Length should be equal to the number of residues. segindices : ArrayLike Array of segment/chain indices. Length should be equal to the number of residues. segids : ArrayLike, None Array of segment/chain IDs. Length should be equal to the number of segs/chains. Returns ------- mda_uni : MDAnalysis.Universe The Universe created by the function. """ n_atoms = len(anames) n_residues = len(np.unique(resindices)) if segids is None: segids = np.array(["ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i] for i in range(len(np.unique(segindices)))]) elif len(segids) != len(np.unique(segindices)): pass if icodes is None: icodes = np.array(["" for _ in range(n_residues)]) mda_uni = mda.Universe.empty( n_atoms, n_residues=n_residues, atom_resindex=resindices, residue_segindex=segindices, trajectory=True, ) mda_uni.add_TopologyAttr("type", atypes) mda_uni.add_TopologyAttr("resnum", resnums) mda_uni.add_TopologyAttr("resids", resnums) mda_uni.add_TopologyAttr("icodes", icodes) mda_uni.add_TopologyAttr("resname", resnames) mda_uni.add_TopologyAttr("name", anames) mda_uni.add_TopologyAttr("altLocs", [""] * len(atypes)) mda_uni.add_TopologyAttr("segid") for i, segid in enumerate(segids): if i == 0: i_segment = mda_uni.segments[0] i_segment.segid = segid else: i_segment = mda_uni.add_Segment(segid=str(segid)) mask = np.argwhere(np.asarray(segindices) == i).flatten() mda_uni.residues[mask.tolist()].segments = i_segment mda_uni.add_TopologyAttr(Segids(np.array(segids))) mda_uni.add_TopologyAttr(Atomindices()) mda_uni.add_TopologyAttr(Resindices()) mda_uni.add_TopologyAttr(Segindices()) return mda_uni
[docs] def template_ICs(template): """ Get the internal coordinate parameters (bond angle and dihedral angle) of the protein backbone atoms of the template. Parameters ---------- template : Union[str, Path, MolSys] The PDB file or MolSys object to serve as the template. Returns ------- phi, psi, omega, bond_angles : ArrayLike Arrays containing backbone dihedral and bond angles as calculated from the template structure. """ if isinstance(template, (str, Path)): template = xl.MolSys.from_pdb(template) elif isinstance(template, (mda.Universe, mda.AtomGroup)): template = xl.MolSys.from_atomsel(template) elif not isinstance(template, MolecularSystemBase): raise RuntimeError('Template must be a PDB file or a MDA.Universe, AtomGroup or chiLife.MolSys object.') Ns = template.select_atoms('name N') CAs = template.select_atoms('name CA') Cs = template.select_atoms('name C') Os = template.select_atoms('name O') phis = get_dihedrals(Cs.positions[:-1], Ns.positions[1:], CAs.positions[1:], Cs.positions[1:]) psis = get_dihedrals(Ns.positions[:-1], CAs.positions[:-1], Cs.positions[:-1], Ns.positions[1:]) omegas = get_dihedrals(CAs.positions[:-1], Cs.positions[:-1], Ns.positions[1:], CAs.positions[1:]) # Add to phis and psis for N_CA_C_O and H_N_CA_C BA1 = np.concatenate([[0.], get_angles(CAs.positions[:-1], Cs.positions[:-1], Ns.positions[1:])]) BA2 = np.concatenate([[0.], get_angles(Cs.positions[:-1], Ns.positions[1:], CAs.positions[1:])]) BA3 = get_angles(Ns.positions, CAs.positions, Cs.positions) BA4 = get_angles(CAs.positions, Cs.positions, Os.positions) bond_angles = np.array([BA1, BA2, BA3, BA4]).T return tuple(np.rad2deg(x) for x in (phis, psis, omegas, bond_angles))
[docs] def make_peptide(sequence: str, phi=None, psi=None, omega=None, bond_angles=None, template=None) -> MolSys: """ Create a peptide from a string of amino acids. chilife NCAA rotamer libraries and smiles can be inserted by using square brackets and angle brackets respectively , e.g. ``[ACE]AA<C1=CC2=C(C(=C1)F)C(=CN2)CC(C(=O)O)N>AA[NME]`` where ACE and NME are peptide caps and <C1=CC2=C(C(=C1)F)C(=CN2)CC(C(=O)O)N> is a smiles for a NCAA. Backbone torsion and bond angles can be set using the phi, psi, omega, and bond_angel keyword argument. All angles should be passed in degrees. Alternativly backbone angles can be set with a protein template that can either be a MolSys object, or a sting/Path object pointing to a PDB file. Parameters ---------- sequence : str The Amino acid sequence. phi : ArrayLike N-1 length array specifying the peptide backbone phi angles (degrees). psi : ArrayLike N-1 length array specifying the peptide backbone psi angles (degrees). omega : ArrayLike N-1 length array specifying the peptide backbone omega angles (degrees). bond_angles : ArrayLike Nx4 shaped array specifying the 4 peptide backbone bond angles (degrees). Note that, for the first residue any value for the first and second bond angles will be ignored because they are undefined. 1 - CA(i-1)-C(i-1)-N 2 - C(i-1)_N_CA 3 - N-CA-C 4 - CA-C-O template : str, Path, MolSys Returns ------- mol : MolSys A chiLife MolSys object """ C_N_LENGTH = 1.34 N_CA_LENGTH = 1.46 CA_C_LENGTH = 1.54 CA_C_N_ANGLE = 1.9897 C_N_CA_ANGLE = 2.1468 N_CA_C_ANGLE = 1.9199 base_IC = {} # Strip whitespace from multiline strings sequence = sequence.strip() # Parse sequences sequence = parse_sequence(sequence) zmat = [] zmat_idxs = [] anames = [] atypes = [] resnames = [] resindices = [] resnums = [] seqiter = iter(sequence) atom_idx = 0 residue_idx = 0 prev_N = 0 ncap = None ccap = None if template is None: pass elif all(x is None for x in (phi, psi, omega, bond_angles)): phi, psi, omega, bond_angles = template_ICs(template) else: raise RuntimeError('You can not pass both a template and explicit bond_angle or backbone dihedral values') # Alter definitions to be based off of other atoms, e.g. N-CA-C-O instead of N-CA-C-N phi = np.deg2rad(phi) if phi is not None else np.ones(len(sequence)) * -0.82030 psi = np.deg2rad(psi) if psi is not None else np.ones(len(sequence)) * -0.99484 omega = np.deg2rad(omega) if omega is not None else np.ones(len(sequence)) * -np.pi if len(phi) < len(sequence): diff = len(sequence) - len(phi) if diff > 3: raise RuntimeError('The number of phi backbone dihedrals does not match the number of residues') phi = (np.concatenate(([-0.82030], phi)) if diff == 1 else np.concatenate(([-0.82030], phi, [-0.82030])) if diff == 2 else np.concatenate(([-0.82030, -0.82030], phi, [-0.82030]))) if len(psi) < len(sequence): diff = len(sequence) - len(psi) if diff > 3: raise RuntimeError('The number of psi backbone dihedrals does not match the number of residues') psi = (np.concatenate(([-0.99484], psi)) if diff == 1 else np.concatenate(([-0.99484, -0.99484], psi)) if diff == 2 else np.concatenate(([-0.99484], psi, [-0.99484, -0.99484]))) if len(omega) < len(sequence): diff = len(sequence) - len(omega) if diff > 3: raise RuntimeError('The number of omega backbone dihedrals does not match the number of residues') omega = (np.concatenate(([-np.pi], omega)) if diff == 1 else np.concatenate(([-np.pi, -np.pi], omega, [])) if diff == 2 else np.concatenate(([-np.pi], omega, [-np.pi, -np.pi]))) for i, res in enumerate(seqiter): if res in base_IC: msysIC = base_IC[res] elif res in xl.nataa_codes or res in xl.SUPPORTED_BB_LABELS: with open(xl.RL_DIR / f"residue_internal_coords/{res.lower()}_ic.pkl", 'rb') as f: msysIC = pickle.load(f) base_IC[res] = msysIC elif res in xl.SUPPORTED_RESIDUES: RL = xl.RotamerEnsemble(res, use_H=True) idxmax = np.argmax(RL.weights) msysIC = RL.internal_coords msysIC.trajectory[idxmax] base_IC[res] = msysIC elif res in xl.ncaps: ncap = res continue elif res in xl.ccaps: ccap = res continue else: mol = smiles2residue(res) msysIC = xl.MolSysIC.from_atoms(mol) anames.append(msysIC.atom_names) atypes.append(msysIC.atom_types) resnames.append(msysIC.atom_resnames) resnums.append(msysIC.atom_resnums + residue_idx) resindices.append(msysIC.atom_resnums + residue_idx-1) # Set backbone dihedrals if i < len(sequence)-1: msysIC.set_dihedral(psi[i+1] + np.pi, 1, ['N', 'CA', 'C', 'O']) if 'H' in msysIC.atom_names: msysIC.set_dihedral(phi[i] + np.pi, 1, ['C', 'CA', 'N', 'H']) tzmat = msysIC.z_matrix.copy() tzmat_idxs = msysIC.z_matrix_idxs.copy() if atom_idx != 0: tzmat_idxs += atom_idx tzmat_idxs[0] = [atom_idx, prev_N + 2, prev_N + 1, prev_N] tzmat_idxs[1] = [atom_idx + 1, atom_idx, prev_N + 2, prev_N + 1] tzmat_idxs[2] = [atom_idx + 2, atom_idx + 1, atom_idx, prev_N + 2] tzmat[0] = [C_N_LENGTH, CA_C_N_ANGLE, psi[i]] tzmat[1] = [N_CA_LENGTH, C_N_CA_ANGLE, omega[i]] tzmat[2] = [CA_C_LENGTH, N_CA_C_ANGLE, phi[i]] prev_N = atom_idx zmat.append(tzmat) zmat_idxs.append(tzmat_idxs) atom_idx = atom_idx + len(tzmat) residue_idx += 1 anames = np.concatenate(anames) atypes = np.concatenate(atypes) resnames = np.concatenate(resnames) resnums = np.concatenate(resnums) resindices = np.concatenate(resindices) zmat = np.concatenate(zmat) zmat_idxs = np.concatenate(zmat_idxs) segindices = np.array([0] * len(anames)) segids = np.array(['A'] * len(anames)) if bond_angles is not None: assert len(bond_angles) == len(np.unique(resindices)) for i, atom_name in enumerate(('N', 'CA', 'C', 'O')): offset = 1 if atom_name in ('N', 'CA') else 0 idxs = np.argwhere(anames == atom_name).flat[offset:] zmat[idxs, 1] = np.deg2rad(bond_angles[offset:, i]) trajectory = _ic_to_cart(zmat_idxs[:, 1:], zmat) mol = MolSys.from_arrays(anames, atypes, resnames, resindices, resnums, segindices, segids=segids, trajectory=trajectory) if ncap is not None: d = phi[1] if phi[1] != -0.82030 else None mol = append_cap(mol, ncap, dihedral=d) if ccap is not None: d = psi[-1] if psi[-1] != -0.99484 else None mol = append_cap(mol, ccap, dihedral=d) return mol
[docs] def parse_sequence(sequence: str) -> List[str]: """ Input a string of amino acids with mized 1-letter codes, square brackted ``[]`` 3-letter codes or angle ``<>`` bracketed smiles and return a list of 3-letter codes and smiles. Parameters ---------- sequence : str The Amino acid sequence. Returns ------- parsed_sequence : List[str] A list of the amino acid sequences. """ parsed_sequence = [] seqiter = iter(sequence) for aa in seqiter: if aa.upper() in xl.nataa_codes: parsed_sequence.append(xl.nataa_codes[aa.upper()]) # Parse chiLife compatible NCAAs elif aa == '[': code = "" aa = next(seqiter) while aa != ']': code += aa try: aa = next(seqiter) except StopIteration: raise RuntimeError("Cannot parse sequence because there is an unbalaced square bracket ``[]``.") parsed_sequence.append(code) elif aa == "<": smiles = "" aa = next(seqiter) while aa != '>': smiles += aa try: aa = next(seqiter) except: raise RuntimeError("Cannot parse sequence because there is an unbalnced angle bracket ``<>``.") parsed_sequence.append(smiles) elif aa in ('>', ']'): raise RuntimeError(f"A terminating ``{aa}`` bracket has been detected, but no opening bracket was placed " f"ahead of it") else: raise RuntimeError(f'``{aa}`` is not a known amino acid') return parsed_sequence
[docs] def smiles2residue(smiles : str, **kwargs) -> MolSys: """ Create a protein residue from a smiles string. Smiles string must contain an N-C-C-O dihedral to identify the protein backbone. If no dihedral is found or there are too many N-C-C-O dihedrals that are indistinguishable this function will fail. Parameters ---------- smiles : str SMILES string to convert to 3d and a residue kwargs : dict Keyword arguments to pass to rdkit.AllChem.EmbedMolecule (usually randomSeed) Returns ------- msys : MolSys chiLife molecular system object containing the smiles string as a 3D molecule and single residue. """ if not rdkit_found: raise RuntimeError("Using smiles2residue or make_peptide with a smile string requires rdkit to be installed.") mol = Chem.MolFromSmiles(smiles) mol = Chem.AddHs(mol) success = AllChem.EmbedMolecule(mol, **kwargs) if success != 0: AllChem.EmbedMolecule(mol, enforceChirality=False, **kwargs) AllChem.MMFFOptimizeMolecule(mol, maxIters=200) res = MolSys.from_rdkit(mol) atoms = res.atoms res.filename = 'UNK.pdb' Natoms = atoms.select_atoms('type N') NCCOs = [] for atom in Natoms: for dih in atom.dihedrals: if np.all(dih.atoms.types == ['N', 'C', 'C', 'O']): NCCOs.append(dih.atoms) elif np.all(dih.atoms.types == ['O', 'C', 'C', 'N']): NCCOs.append(dih.atoms[::-1]) bb_candidates = [] for dih in NCCOs: assert dih[-1].type == 'O' if len(dih[-1].bonds) == 1: bb_candidates.append(dih) if len(bb_candidates) == 1: bb = bb_candidates[0] else: bb = None for can in bb_candidates: cooh = "".join(sorted(can[2].bonded_atoms.types)) if cooh != 'COO': continue else: bb = can break bb.names = ['N', 'CA', 'C', 'O'] count_dict = {} for atom in atoms: if atom in bb: pass elif 'N' in atom.bonded_atoms.names: if atom.type == 'H': atom.name = count_dict.get('X', 'H') count_dict['X'] = 'X' elif 'CA' in atom.bonded_atoms.names and atom.type == 'H': atom.name = 'HA' elif 'O' in atom.bonded_atoms.names and atom.type == 'H': atom.name = 'X' elif 'C' in atom.bonded_atoms.names and atom.type == 'O': atom.name = 'X' for oatom in atom.bonded_atoms: if oatom.type == 'H': oatom.name ='X' elif atom.name == 'X': pass else: n = count_dict.setdefault(atom.type, 1) atom.name = f'{atom.type}{n}' count_dict[atom.type] += 1 save_atoms = atoms.select_atoms('not name X') xl.save('UNK.pdb', save_atoms) Msys = MolSys.from_pdb('UNK.pdb', sort_atoms=True) os.remove('UNK.pdb') return Msys
[docs] def append_cap(mol : MolSys, cap : str, resnum = None, dihedral: float = None) -> MolSys: """ Append a peptide cap to a molecular system. Parameters ---------- mol : MolSys The molecular system to be capped. cap : str The name (3-letter identifier) of the cap to be attached. Returns ------- mol : MolSys The molecular system with the cap. """ cap_name = cap.upper() term = "N" if cap_name in xl.ncaps else "C" if cap_name in xl.ccaps else None if resnum is not None: neighbor = mol.select_atoms(f'resnum {resnum}') elif term == "N": neighbor = mol.residues[0] resnum = neighbor.resnum elif term == "C": neighbor = mol.select_atoms('protein').residues[-1] resnum = neighbor.resnum else: raise RuntimeError('`term` not found in neighboring residue. Note that `term` must be an `N` or a `C`.') nmx, nori = xl.global_mx(*neighbor.select_atoms('name N CA C').positions) cap_struct = xl.MolSys.from_pdb(xl.RL_DIR / f'cap_pdbs/{cap_name}.pdb') cap_struct.positions = cap_struct.positions @ nmx + nori cap_struct.resnum = resnum + 1 if term == "C" else resnum - 1 if dihedral is not None: # Identify nearst atom of cap as bonded atom neighbor_atom = neighbor.atoms[neighbor.names == term].position[0] bonded_atom = np.linalg.norm(cap_struct.positions - neighbor_atom, axis=1).argmin() bonded_atom = cap_struct.atoms[bonded_atom] # Set up dih if term == 'N': dihedral_points = bonded_atom.positions, *neighbor.select_atoms('name N CA C').positions else: dihedral_points = *neighbor.select_atoms('name N CA C').positions, bonded_atom.positions dihedral_points = dihedral_points[::-1] start_angle = get_dihedral(dihedral_points) theta = start_angle - dihedral v = dihedral_points[2] - dihedral_points[1] v /= np.linalg.norm(v) mx = get_dihedral_rotation_matrix(theta, v) cap_struct.positions = (cap_struct.positions - dihedral_points[2]) @ mx.T + dihedral_points[2] elif term=='C' and 'OXT' in neighbor.names: C_pos = neighbor.select_atoms('name C').positions vfrom = cap_struct[0].position - C_pos vfrom /= np.linalg.norm(vfrom) vto = neighbor.select_atoms('name OXT').positions - C_pos vto /= np.linalg.norm(vto) R, _ = Rotation.align_vectors(vfrom, vto) R = R.as_matrix() cap_struct.positions = (cap_struct.positions - C_pos) @ R + C_pos mol = mol.select_atoms(f'not (resid {resnum} and name OXT)') systems = [cap_struct, mol] if term == "N" else [mol, cap_struct] mol = concat_molsys(*systems) return mol
[docs] def store_cap(name, mol, term): """ Save the cap attached to a molecule. A pdb file will be saved in the chilife/data/rotamer_libraries/cap_pdbs folder with the provided name and the cap will be registered in either the ncaps.txt or ccaps.txt file in the chilife/data/rotamer_libraries directory. Parameters ---------- name : str Name (3-letter identifier) of the cap to be attached. mol : str, Path, Universe, AtomGroup, MolSys Path, MDA.AtomGroup or MolSys object containing the cap. The cap must be its own residue number, i.e. not merged with the reside before or after it. term: str The terminus the cap is attached to. Only two values are accepted ``N`` and ``C`` """ name = name.upper() term = term.upper() if isinstance(mol, (str, Path)): mol = xl.MolSys.from_pdb(mol) elif isinstance(mol, (mda.Universe, mda.AtomGroup)): mol = xl.MolSys.from_atomsel(mol) bonds = xl.guess_bonds(mol.positions, mol.types) top = xl.Topology(mol, bonds) mol.topology = top if term == "N": cap = mol.residues[0] neighbor = mol.residues[1] txt_file = xl.RL_DIR / 'ncaps.txt' resnum=0 elif term == "C": cap = mol.residues[-1] neighbor = mol.residues[-2] txt_file = xl.RL_DIR / 'ccaps.txt' resnum = 1 else: raise RuntimeError('`term` not found in neighboring residue. Note that `term` must be an `N` or a `C`.') ori, mx = xl.local_mx(*neighbor.select_atoms("name N CA C").positions) cap.positions = (cap.positions - ori) @ mx with open(txt_file, 'r+') as f: line = f.readline() keys = line.split() if name not in keys: f.write(f" {name}") cap.resnum = resnum xl.save(xl.RL_DIR / f'cap_pdbs/{name}.pdb', cap)
[docs] def get_grid_mx(N, CA, C): """ Get the rotation matrix and translation vector to orient a grid of lennard-jones spheres into a new coordinate frame. It is expected that the grid is defined as it is in :func:`screen_site_volume`, i.e. the xy plane of the grid is centered at the origin and the z-axis is positive. Parameters ---------- N : np.ndarray First coordinate of the site. CA : np.ndarray Second coordinate of the site (The origin). C : np.ndarray Third coordinate of the site. Returns ------- rotation matrix: np.ndarray Rotation matrix for the grid. Should be applied before translation. origin : np.ndarray Translation vector for the grid. Should be applied after rotation. """ CA_N = N - CA CA_N /= np.linalg.norm(CA_N) CA_C = C - CA CA_C /= np.linalg.norm(CA_C) dummy_axis = CA_N + CA_C dummy_axis = dummy_axis / np.linalg.norm(dummy_axis) # Define new y-axis xaxis_plane = N - C dummy_comp = xaxis_plane.dot(dummy_axis) xaxis = xaxis_plane - dummy_comp * dummy_axis xaxis = xaxis / np.linalg.norm(xaxis) theta = np.deg2rad(35.25) mx = get_dihedral_rotation_matrix(theta, xaxis) yaxis = mx @ dummy_axis zaxis = np.cross(xaxis, yaxis) rotation_matrix = np.array([xaxis, yaxis, zaxis]) origin = CA return rotation_matrix, origin
[docs] def write_grid(grid, name='grid.pdb', atype='Se'): """ Given an array of 3-dimensional points (a grid usually), write a PDB so the grid can be visualized. Parameters ---------- grid : ArrayLike The 3D points to write name: str The name of the file to write the grid to. atype : str Atom type to use for the grid. """ with open(name, 'w') as f: for i, p in enumerate(grid): f.write(io.fmt_str.format(i+1, 'SEN', 'GRD', 'A', 1, *p, 1.0, 1.0, atype))
[docs] def get_site_volume(site: int, mol: Union[MDAnalysis.Universe, MDAnalysis.AtomGroup, MolecularSystemBase], grid_size: Union[float, ArrayLike] = 10, offset: Union[float, ArrayLike] = -0.5, spacing: float = 1.0, vdw_r = 2.8, write: str = False, return_grid: bool = False ) -> Union[float, ArrayLike]: """ Calculate the (approximate) accessible volume of a single site. The volume is calculated by superimposing a grid of lennard-jones spheres at the site, eliminating those with clashes and calculating the volume from the remaining spheres. Note that this is a different "accessible volume" than the "accessible volume" method made popular by MtsslWizard. Parameters ---------- site : int Site of the `mol` to calculate the accessible volume of. mol : MDAnalysis.Universe, MDAnalysis.AtomGroup, chilife.MolecularSystemBase The molecule/set of atoms containing the site to calculate the accessible volume of. grid_size: float, ArrayLike Dimensions of grid to use to calculate the accessible volume. Can be a float, specifying a cube with equal dimensions, or an array specifying a unique value for each dimension. Uses units of Angstroms. offset: float, ArrayLike The offset of the initial grid with respect to the superimposition site. If ``offset`` is a float, the grid will be offset in the z-dimension (along the CA-CB bond). If ``offset`` is an array, the grid will be offset in each x, y, z dimension by the corresponding value of the array. Uses units of Angstroms spacing : float Spacing between grid points. Defaults to 1.0 Angstrom vdw_r : float van der Waals radius for clash detection. Defaults to 2.8 Angstroms. write : bool, str, default False Switch to make chiLife write the grid to a PDB file. If ``True`` the grid will be written to grid.pdb. If given a string chiLife will use the string as the file name. return_grid: bool, default False If ``return_grid=True``, the volume will be returned as a numpy array containing the grid points that do not clash with the neighboring atoms. Returns ------- volume : float, np.ndarray The accessible volume at the given site. If ``return_grid=True`` the volume will be returned as the set of grid points that do not clash with neighboring atoms, otherwise volume will be the estimated accessible volume of the site in cubic angstroms (Å :sup:`3`). """ if isinstance(grid_size, (int, float, complex)): x = y = z = grid_size elif hasattr(grid_size, '__len__'): x, y, z = grid_size else: raise RuntimeError("grid_size must be a float or an array like object with 3 elements.") half_x = x / 2 half_y = y / 2 if isinstance(offset, (int, float, complex)): xo, yo = 0, 0 zo = offset elif hasattr(offset, '__len__'): xo, yo, zo = offset else: raise RuntimeError("offset must be a float or an array like object with 3 elements.") xp = x / spacing yp = y / spacing zp = z / spacing # Create grid grid = np.mgrid[-half_x + xo:half_x + xo:xp * 1j, -half_y + yo:half_y + yo:yp * 1j, zo :z+zo:zp * 1j].swapaxes(0, -1) xl, yl, zl, _ = grid.shape grid = grid.reshape(-1, 3) # Superimpose grid on site bb = mol.select_atoms(f"resid {site} and name N CA C") if len(bb.atoms) != 3: raise RuntimeError(f'The provided protein does not have a canonical backbone at residue {site}.') N, CA, C = bb.positions mx, ori = get_grid_mx(N, CA, C) grid_tsf = grid @ mx + ori # Perform clash evaluation sel = mol.select_atoms(f'protein and not resid {site}') d = cdist(grid_tsf, sel.positions) mask = d < vdw_r mask2 = ~np.all(d > 5, axis=-1) # Remove clashing grid points mask = (~mask).prod(axis=-1) * mask2 idxs = np.argwhere(mask).flatten() if len(idxs) == 0: return 0 grid_tsf = grid_tsf[idxs] # check for discontinuities tree = cKDTree(grid_tsf) neighbors = tree.query_pairs(np.sqrt(2 * spacing) * 1.2) root_idx = np.argmin(np.linalg.norm(grid_tsf - CA, axis=-1)) graph = ig.Graph(neighbors) # Use only points continuous with root_idx for g in graph.connected_components(): if root_idx in g: grid_tsf = grid_tsf[g] break # process output if write: filename = write if isinstance(write, str) else 'grid.pdb' write_grid(grid_tsf, name=filename) if return_grid: return grid_tsf else: return len(grid_tsf) * spacing**3