Source code for chilife.chilife

from __future__ import annotations
import tempfile
import logging
import os
import rtoml
import re
import textwrap
import zipfile
import shutil
from copy import deepcopy
from pathlib import Path
from itertools import combinations, chain
from collections import Counter
from typing import Callable, Tuple, Union, List, Dict
from unittest import mock

import igraph as ig
from tqdm import tqdm

import numpy as np
from scipy.spatial.distance import cdist
from scipy.stats import t
import MDAnalysis as mda

import chilife.io as io
from .globals import (
    RL_DIR,
    USER_RL_DIR,
    DATA_DIR,
    SUPPORTED_RESIDUES,
    USER_LIBRARIES,
    USER_dLIBRARIES,
    rotlib_defaults,
    dihedral_defs,
)
from .alignment_methods import local_mx
from .Topology import get_min_topol, guess_bonds
from .pdb_utils import sort_pdb, get_backbone_atoms, get_bb_candidates
from .protein_utils import mutate, guess_mobile_dihedrals
from .MolSysIC import MolSysIC
from .scoring import GAS_CONST, reweight_rotamers, ljEnergyFunc
from .numba_utils import get_delta_r, normdist
from .SpinLabel import SpinLabel
from .RotamerEnsemble import RotamerEnsemble
from .LigandEnsemble import LigandEnsemble
from .SpinLabelTraj import SpinLabelTraj

logging.captureWarnings(True)


[docs] def distance_distribution( *args: "SpinLabel", r: "ArrayLike" = None, sigma: float = 1.0, use_spin_centers: bool = True, dependent: bool = False, uq: bool = False, ) -> np.ndarray: """Calculates total distribution of spin-spin distances among an arbitrary number of spin labels, using the distance range ``r`` (in angstrom). The distance distribution is obtained by summing over all label pair distance distributions. These in turn are calculated by convolving a weighted histogram of pairwise distances between labels with a normal distribution. Distance between labels are calculated either between label ``spin_center`` if ``spin_populations=False``, or between all pairs of spn-bearing atoms if ``spin_populations=True``. Parameters ---------- *args : SpinLabel Any number of spin label objects. r : ArrayLike Evenly spaced array of distances (in angstrom) to calculate the distance distribution over. sigma : float The standard deviation of the normal distribution used in convolution with the distance histogram, in angstrom. Default is 1. use_spin_centers : bool If False, distances are computed between the weighted centroids of the spin atoms. If True, distances are computed by summing over the distributed spin density on spin-bearing atoms on the labels. dependent: bool Consider the (clash) effects of spin label rotamers on each other. uq : bool Perform uncertainty analysis (experimental) Returns ------- P : np.ndarray Predicted distance distribution, in 1/angstrom """ # Allow r to be passed as last non-keyword argument if r is None and np.ndim(args[-1]) != 0: r = args[-1] args = args[:-1] if len(args) < 2: raise TypeError("At least two spin label objects are required.") if r is None: raise TypeError("Keyword argument r with distance domain vector is missing.") if any( not hasattr(arg, atr) for arg in args for atr in ["spin_coords", "spin_centers", "weights"] ): raise TypeError( "Arguments other than spin labels must be passed as a keyword arguments." ) r = np.asarray(r) if any(isinstance(SL, SpinLabelTraj) for SL in args): P = traj_dd(*args, r=r, sigma=sigma) return P elif uq: Ps = [] n_boots = uq if uq > 1 else 100 for i in range(n_boots): dummy_labels = [] for SL in args: idxs = np.random.choice(len(SL), len(SL)) dummy_SL = mock.Mock() dummy_SL.spin_coords = np.atleast_2d(SL.spin_coords[idxs]) dummy_SL.spin_centers = np.atleast_2d(SL.spin_centers[idxs]) dummy_SL.spin_weights = SL.spin_weights dummy_SL.weights = SL.weights[idxs] dummy_SL.weights /= dummy_SL.weights.sum() dummy_labels.append(dummy_SL) Ps.append( pair_dd( *dummy_labels, r=r, sigma=sigma, use_spin_centers=use_spin_centers ) ) Ps = np.array(Ps) return Ps else: P = pair_dd( *args, r=r, sigma=sigma, use_spin_centers=use_spin_centers, dependent=dependent, ) return P
def pair_dd( *args, r: "ArrayLike", sigma: float = 1.0, use_spin_centers: bool = True, dependent: bool = False, ) -> np.ndarray: """Obtain the total pairwise spin-spin distance distribution over ``r`` for a list of spin labels. The distribution is calculated by convolving the weighted histogram of pairwise spin-spin distances with a normal distribution with standard deviation ``sigma``. Parameters ---------- *args : SpinLabel SpinLabels to use when calculating the distance distribution r : ArrayLike Distance domain vector, in angstrom sigma : float Standard deviation of normal distribution used for convolution, in angstrom use_spin_centers : bool If False, distances are computed between spin centers. If True, distances are computed by summing over the distributed spin density on spin-bearing atoms on the labels. dependent: bool Consider the (clash) effects of spin label rotamers on each other. Returns ------- P : np.ndarray Predicted normalized distance distribution, in units of 1/angstrom """ # Calculate pairwise distances and weights SL_Pairs = combinations(args, 2) weights, distances = [], [] for SL1, SL2 in SL_Pairs: if use_spin_centers: coords1 = SL1.spin_centers coords2 = SL2.spin_centers weights1 = SL1.weights weights2 = SL2.weights else: coords1 = SL1.spin_coords.reshape(-1, 3) coords2 = SL2.spin_coords.reshape(-1, 3) weights1 = np.outer(SL1.weights, SL1.spin_weights).flatten() weights2 = np.outer(SL2.weights, SL2.spin_weights).flatten() distances.append(cdist(coords1, coords2).flatten()) weights.append(np.outer(weights1, weights2).flatten()) if dependent: if not isinstance(SL1.energy_func, ljEnergyFunc): raise RuntimeError( "Currently only ljEnergyFunc objects are supported when using dependent=True" ) if ( SL1.energy_func.join_rmin is not SL2.energy_func.join_rmin or SL1.energy_func.join_eps is not SL2.energy_func.join_eps ): raise RuntimeError( "At least two labels passed use different energy functions. Make sure all " "labels use the same energy functions when setting `dependent=True`. This does not" "mean that the energy functions use the same parameters. They have to be the SAME " "object and satisfy `SL1.energy_func is SL2.energy_func`. This can be achieved be " "creating an energy function object" ) nrot1, nrot2 = len(SL1), len(SL2) nat1, nat2 = len(SL1.side_chain_idx), len(SL2.side_chain_idx) join_rmin = SL1.energy_func.join_rmin join_eps = SL1.energy_func.join_eps rmin_ij = join_rmin(SL1.rmin2, SL2.rmin2) eps_ij = join_eps(SL1.eps, SL2.eps) rot_coords1 = SL1.coords[:, SL1.side_chain_idx] rot_coords2 = SL2.coords[:, SL2.side_chain_idx].reshape(-1, 3) ljs = [] for i, rots in enumerate(rot_coords1): lj = cdist(rots, rot_coords2) lj = lj.reshape(1, nat1, nrot2, nat2).transpose(0, 2, 1, 3) lj = rmin_ij[None, None, ...] / lj lj = lj * lj * lj lj = lj * lj lj = lj * lj # Cap lj[lj > 10] = 10 # Rep only lj = eps_ij * (lj * lj) lj = lj.sum(axis=(2, 3)) ljs.append(lj) ljs = np.concatenate(ljs) weights[-1], _ = reweight_rotamers(ljs.flatten(), SL1.temp, weights[-1]) distances = np.concatenate(distances) weights = np.concatenate(weights) # Calculate distance histogram hist, _ = np.histogram( distances, weights=weights, range=(min(r), max(r)), bins=len(r) ) # Convolve with normal distribution if non-zero standard deviation is given if sigma != 0: delta_r = get_delta_r(r) _, g = normdist(delta_r, 0, sigma) P = np.convolve(hist, g, mode="same") else: P = hist # Normalize distribution integral = np.trapezoid(P, r) if integral != 0: P /= integral return P def traj_dd( SL1: "SpinLabelTraj", SL2: "SpinLabelTraj", r: "ArrayLike", sigma: float, **kwargs, ) -> np.ndarray: """Calculate a distance distribution from a trajectory of spin labels by calling ``distance_distribution`` on each frame and averaging the resulting distributions. Parameters ---------- SL1, SL2: SpinLabelTrajectory Spin label to use for distance distribution calculation. r : ArrayLike Distance domain to use when calculating distance distribution. sigma : float Standard deviation of the gaussian kernel used to smooth the distance distribution **kwargs : dict, optional Additional keyword arguments to pass to ``distance_distribution`` . Returns ------- P: ndarray Distance distribution calculated from the provided SpinLabelTrajectories """ # Ensure that the SpinLabelTrajectories have the same number of frames. if len(SL1) != len(SL2): raise ValueError("SpinLabelTraj objects must have the same length") # Calculate the distance distribution for each frame and sum P = np.zeros_like(r) for _SL1, _SL2 in zip(SL1, SL2): P += distance_distribution(_SL1, _SL2, r=r, sigma=sigma, **kwargs) # Normalize distance distribution P /= np.trapezoid(P, r) return P def confidence_interval( data: "ArrayLike", cutoff: float = 0.95, non_negative: bool = True ) -> Tuple[np.array, np.array]: """ Parameters ---------- data: Array Like Array of distance distribution samples cutoff: float Confidence cutoff to calculate confidence interval over. non_negative: bool Clip negative elements of confidence intervals to 0. Returns ------- interval: Tuple[Array, Array] Lower and upper confidence intervals at the provided cutoff """ mu = np.mean(data, axis=0) std = np.std(data, axis=0) ub, lb = t.interval(cutoff, df=len(data) - 1, loc=mu, scale=std) lb[np.isnan(lb)] = 0.0 ub[np.isnan(ub)] = 0.0 if non_negative: lb = lb.clip(0) ub = ub.clip(0) return lb, ub
[docs] def create_library( libname: str, pdb: str, dihedral_atoms: List[List[str]] = None, site: int = 1, resname: str = None, dihedrals: "ArrayLike" = None, weights: "ArrayLike" = None, sigmas: "ArrayLike" = None, permanent: bool = False, default: bool = False, force: bool = False, spin_atoms: List[str] = None, aln_atoms: List[str] = None, backbone_atoms: List[str] = None, description: str = None, comment: str = None, reference: str = None, ) -> None: """Create a chiLife rotamer library from a pdb file and a user defined set of parameters. Parameters ---------- libname : str Name for the new rotamer library. pdb : str Name of (and path to) pdb file containing the user defined spin label structure. This pdb file should contain only the desired spin label and no additional residues. site : int The residue number of the side chain in the pdb file you would like to add. resname : str 3-letter residue code. dihedral_atoms : list a list of rotatable dihedrals. List should contain sub-lists of 4 atom names. Atom names must be the same as defined in the pdb file eg: .. code-block:: python [['CA', 'CB', 'SG', 'SD'], ['CB', 'SG', 'SD', 'CE']...] dihedrals : ArrayLike, optional Array of dihedral angles. If provided the new label object will be stored as a rotamer library with the dihedrals provided. Array should be n x m where n is the number of rotamers and m is the number of dihedrals. weights : ArrayLike, optional Weights associated with the dihedral angles provided by the ``dihedrals`` keyword argument. There should be one weight per rotamer and the rotamer weights should sum to 1. sigmas : ArrayLike, optional Sigma parameter for distributions of dihedral angles. Should be n x m matrix where n is the number of rotamers and m is the number of dihedrals. This feature will be used when performing off rotamer samplings. permanent: bool If set to True the library will be stored in the chilife user_rotlibs directory in addition to the current working directory. default : bool If set to true and permanent is also set to true then this rotamer library will become the default rotamer library for the given resname force: bool = False, If set to True and permanent is also set to true this library will overwrite any existing library with the same name. spin_atoms : list A list of atom names on which the spin density is localized. aln_atoms: List[str] The three atoms defining the alignment coordinate frame of the target residue. Usually the N-CA-C atoms of the protein backbone or similar for nucleotide backbones. backbone_atoms: List[str] Names of the atoms that correspond to backbone atoms. This includes aln_atoms and any other atoms that may need to be adjusted to fit the target residue backbone, e.g. the hydrogen atoms of carboxyl oxygen atoms. Any atoms not part of the backbone will be considered side chain atoms and may be subject to sampling. description : str A short (< 60 characters) description of the side chain library being created comment : str Additional information about the rotamer library that may not fit in description. reference : str Any relevant citations associated with the rotamer library. """ resname = libname[:3] if resname is None else resname struct, spin_atoms = pre_add_library(pdb, spin_atoms, aln_atoms=aln_atoms) resi_selection = struct.select_atoms(f"resnum {site}") bonds = resi_selection.intra_bonds.indices if not continuous_topol(resi_selection, bonds): raise RuntimeError( "The RotamerEnsemble does not seem to have a consistent and continuous bond topology over " "all states. Please check the input PDB and remove any states that do not have the " "expected bond topology. Alternatively, you can force certain bonds by adding CONECT " "information to the PDB file. See " "https://www.wwpdb.org/documentation/file-format-content/format33/sect10.html" ) # Convert loaded rotamer ensemble to internal coords internal_coords = MolSysIC.from_atoms( resi_selection, preferred_dihedrals=dihedral_atoms, bonds=bonds, use_chain_operators=False, ) if not dihedral_atoms: dihedral_atoms = guess_mobile_dihedrals(internal_coords, aln_atoms=aln_atoms) internal_coords.shift_resnum(-(site - 1)) if len(internal_coords.chains) > 1: raise ValueError( "The PDB of the label supplied appears to have a chain break. Please check your PDB and " "make sure there are no chain breaks in the desired label and that there are no other " "chains in the pdb file. If the error persists, check to be sure all atoms are the correct " "element as chilife uses the elements to determine if atoms are bonded." ) backbone_atoms, aln_atoms = aln_sanity( internal_coords, resname, backbone_atoms, aln_atoms ) # If multi-state pdb extract dihedrals from pdb if dihedrals is None: dihedrals = np.rad2deg( [ic.get_dihedral(1, dihedral_atoms) for ic in internal_coords] ) if dihedrals.shape == (len(dihedrals),): dihedrals.shape = (len(dihedrals), 1) if weights is None: weights = np.ones(len(dihedrals)) weights /= weights.sum() save_dict = prep_restype_savedict( libname, resname, internal_coords, weights, dihedrals, dihedral_atoms, sigmas=sigmas, spin_atoms=spin_atoms, aln_atoms=aln_atoms, backbone_atoms=backbone_atoms, description=description, comment=comment, reference=reference, ) # Check that all rotamer backbones are aligned if ( np.sum(np.abs(np.diff(save_dict["coords"][:, 0]))) / len(save_dict["coords"]) > 1e-3 ): aln_idxs = [ np.argwhere(save_dict["atom_names"] == a).flat[0] for a in save_dict["aln_atoms"] ] backbone = np.squeeze(save_dict["coords"][:, aln_idxs]) oris, mxs = [np.array(x) for x in zip(*[local_mx(*bb) for bb in backbone])] save_dict["coords"] = (save_dict["coords"] - oris[:, None, :]) @ mxs else: backbone = save_dict["coords"][0, :3] ori, mx = local_mx(*backbone) save_dict["coords"] = np.einsum("ijk,kl->ijl", save_dict["coords"] - ori, mx) # Save rotamer library np.savez(Path().cwd() / f"{libname}_rotlib.npz", **save_dict, allow_pickle=True) if permanent: add_library( f"{libname}_rotlib.npz", libname=libname, default=default, force=force )
[docs] def create_dlibrary( libname: str, pdb: str, sites: Tuple, dihedral_atoms: List[List[List[str]]], resname: str = None, dihedrals: "ArrayLike" = None, weights: "ArrayLike" = None, sort_atoms: Union[List[str], Dict, str] = True, permanent: bool = False, default: bool = False, force: bool = False, spin_atoms: List[str] = None, description: str = None, comment: str = None, reference: str = None, ) -> None: """Create a chiLife bifunctional rotamer library from a pdb file and a user defined set of parameters. Parameters ---------- libname: str, Name for the user defined label. pdb : str Name of (and path to) pdb file containing the user defined spin label structure. This pdb file should contain only the desired spin label and no additional residues. sites : Tuple[int] The residue numbers that the bifunctional label is attached to. dihedral_atoms: List[List[List[str]]] A list defining the mobile dihedral atoms of both halves of the bifunctional label. There should be two entries, one for each site that the bifunctional label attaches to. Each entry should contain a sublist of dihedral definitions. Dihedral definitions are a list of four strings that are the names of the four atoms defining the dihedral angle. .. code-block:: python [ # Side chain 1 [['CA', 'CB', 'SG', 'SD'], ['CB', 'SG', 'SD', 'CE']...], # Side chain 2 [['CA', 'CB', 'SG', 'SD'], ['CB', 'SG', 'SD', 'CE']...] ] resname : str Residue type 3-letter code. dihedrals : ArrayLike, optional Array of dihedral angles. If provided the new label object will be stored as a rotamer library with the dihedrals provided. weights : ArrayLike, optional Weights associated with the dihedral angles provided by the ``dihedrals`` keyword argument permanent: bool If set to True the library will be stored in the chilife user_rotlibs directory in addition to the current working directory. sort_atoms: bool Rotamer library atoms are sorted to achieve consistent internal coordinates by default. Sometimes this sorting is not desired or the PDB file is already sorted. Setting this option to ``False`` turns off atom sorting. permanent : bool If set to True, a copy of the rotamer library will be stored in the chiLife rotamer libraries directory default : bool If set to true and permanent is also set to true then this rotamer library will become the default rotamer library for the given resname force: bool = False, If set to True and permanent is also set to true this library will overwrite any existing library with the same name. spin_atoms : list, dict List of atom names on which the spin density is localized, e.g ['N', 'O'], or dictionary with spin atom names (key 'spin_atoms') and spin atom weights (key 'spin_weights'). description : str A short (< 60 characters) description of the side chain library being created comment : str Additional information about the rotamer library that may not fit in description. reference : str Any relevant citations associated with the rotamer library. """ site1, site2 = sites increment = site2 - site1 resname = libname[:3] if resname is None else resname if len(dihedral_atoms) != 2: dihedral_error = True elif not isinstance(dihedral_atoms[0], List): dihedral_error = True elif len(dihedral_atoms[0][0]) != 4: dihedral_error = True else: dihedral_error = False if dihedral_error: raise RuntimeError( "dihedral_atoms must be a list of lists where each sublist contains the list of dihedral atoms" "for the i and i+{increment} side chains. Sublists can contain any amount of dihedrals but " "each dihedral should be defined by exactly four unique atom names that belong to the same " "residue number" ) struct, spin_atoms = pre_add_library( pdb, spin_atoms, uniform_topology=False, sort_atoms=sort_atoms ) res1 = struct.select_atoms(f"resnum {site1}") res2 = struct.select_atoms(f"resnum {site2}") # Identify the cap based off of the user defined mobile dihedrals last_bonds = [] for i, res in enumerate((res1, res2)): site = res.resnums[0] dh_bonds = [dihedral[1:3] for dihedral in dihedral_atoms[i]] dh_bond_idxs = [] for anames in dh_bonds: asel = struct.select_atoms(f"resnum {site} and name {' '.join(anames)}") # Sometimes dihedrals are defined with the bonded atom(s) on the other residue if len(asel) < 2: missing_name = anames[0] if asel.names[0] == anames[1] else anames[1] other_site = res1.resnums[0] if i == 1 else res2.resnums[0] asel += struct.select_atoms( f"resnum {other_site} and name {missing_name}" ) dh_bond_idxs.append(asel.ix) dh_bond_idxs = np.array(dh_bond_idxs) last_bonds.append(dh_bond_idxs[np.argmax(dh_bond_idxs[:, 0])]) bond_indices = struct.bonds.indices graph = ig.Graph(n=np.max(bond_indices), edges=bond_indices) # disconnect the cap by cutting the rotatable bond of the dihedrals in each site last_bonds = [tuple(bond) for bond in last_bonds] graph.delete_edges(last_bonds) # Find all atoms in between the cut bonds starts = [node[1] for node in last_bonds] cap1_idxs, _ = graph.dfs(starts[0]) cap2_idxs, _ = graph.dfs(starts[1]) ovlp1_selection = struct.atoms[cap1_idxs] csts = ovlp1_selection.names csts = csts.astype("U4") ovlp1_selection.residues.resnums = site1 ovlp1_selection.residues.resids = site1 res1 += ovlp1_selection[~np.isin(ovlp1_selection.ix, res1.ix)] res1_bonds = res1.intra_bonds.indices error_message = ( "The dRotamerEnsemble does not seem to have a consistent and continuous bond topology over" " all states. Please check the input PDB and remove any states that do not have the " "expected bond topology. Alternatively, you can force certain bonds by adding CONECT " "information to the PDB file. See " "https://www.wwpdb.org/documentation/file-format-content/format33/sect10.html" ) if not continuous_topol(res1, res1_bonds): raise RuntimeError(error_message) IC1 = MolSysIC.from_atoms( res1, dihedral_atoms[0], res1_bonds, use_chain_operators=False ) ovlp2_selection = struct.atoms[cap2_idxs] ovlp2_selection.residues.resnums = site2 ovlp2_selection.residues.resids = site2 res2 += ovlp2_selection[~np.isin(ovlp2_selection.ix, res2.ix)] res2_bonds = res2.intra_bonds.indices if not continuous_topol(res2, res2_bonds): raise RuntimeError(error_message) IC2 = MolSysIC.from_atoms( res2, dihedral_atoms[1], res2_bonds, use_chain_operators=False ) IC1.shift_resnum(-(site1 - 1)) IC2.shift_resnum(-(site2 - 1)) if len(IC1.chains) > 1 or len(IC2.chains) > 1: raise RuntimeError( "The PDB of the label supplied appears to have a chain break. Please check your PDB and " "make sure there are no chain breaks in the desired label and that there are no other " "chains in the pdb file. If the error persists, check to be sure all atoms are the correct " "element as chilife uses the elements to determine if atoms are bonded." ) backbone1, aln1 = aln_sanity(IC1, resname) backbone2, aln2 = aln_sanity(IC2, resname) # If multi-state pdb extract rotamers from pdb if dihedrals is None: dihedrals = [] for IC, resnum, dihedral_set in zip([IC1, IC2], [site1, site2], dihedral_atoms): dihedrals.append([ICi.get_dihedral(1, dihedral_set) for ICi in IC]) if weights is None: weights = np.ones(len(IC1)) weights /= weights.sum() libname = libname + f"ip{increment}" save_dict_1 = prep_restype_savedict( libname + "A", resname, IC1, weights, dihedrals[0], dihedral_atoms[0], spin_atoms=spin_atoms, backbone_atoms=backbone1, aln_atoms=aln1, description=description, comment=comment, reference=reference, ) save_dict_2 = prep_restype_savedict( libname + "B", resname, IC2, weights, dihedrals[1], dihedral_atoms[1], backbone_atoms=backbone2, aln_atoms=aln2, resi=1 + increment, spin_atoms=spin_atoms, ) # Save individual data sets and zip cwd = Path().cwd() np.savez(cwd / f"{libname}A_rotlib.npz", **save_dict_1, allow_pickle=True) np.savez(cwd / f"{libname}B_rotlib.npz", **save_dict_2, allow_pickle=True) np.save(cwd / f"{libname}_csts.npy", csts) with zipfile.ZipFile(f"{libname}_drotlib.zip", mode="w") as archive: archive.write(f"{libname}A_rotlib.npz") archive.write(f"{libname}B_rotlib.npz") archive.write(f"{libname}_csts.npy") # Cleanup intermediate files os.remove(f"{libname}A_rotlib.npz") os.remove(f"{libname}B_rotlib.npz") os.remove(f"{libname}_csts.npy") if permanent: add_library( f"{libname}_drotlib.zip", libname=libname, default=default, force=force )
def pre_add_library( pdb: str, spin_atoms: List[str], uniform_topology: bool = True, sort_atoms: bool = True, aln_atoms: List[str] = None, ) -> Tuple[mda.Universe, Dict]: """Helper function to sort PDBs, save spin atoms, update lists, etc. when adding a SpinLabel or dSpinLabel. Parameters ---------- pdb : str Name (and path) of the pdb containing the new label. spin_atoms : List[str] Atoms of the SpinLabel where the unpaired electron is located. uniform_topology : bool Assume all rotamers of the library have the same topology (i.e. no differences in atom bonding). If false chilife will attempt to find the minimal topology shared between all rotamers for defining internal coordinates. sort_atoms : bool Switch to turn off atom sorting. Only set to true if you know what you are doing. aln_atoms : List[str] Atoms intended for rotamer ensemble alignment. This will affect sorting if using non-standard alignment atoms. Should only be used internally. Returns ------- struct : MDAnalysis.Universe MDAnalysis Universe object containing the rotamer ensemble with each rotamer as a frame. All atoms should be properly sorted for consistent construction of internal coordinates. spin_atoms : dict Dictionary of spin atoms and weights if specified. """ if sort_atoms: # Sort the PDB for optimal dihedral definitions pdb_lines, bonds = sort_pdb( pdb, uniform_topology=uniform_topology, return_bonds=True, aln_atoms=aln_atoms, ) bonds = get_min_topol(pdb_lines, forced_bonds=bonds) # Write a temporary file with the sorted atoms if isinstance(pdb_lines[0], list): with tempfile.NamedTemporaryFile( suffix=".pdb", mode="w+", delete=False ) as tmpfile: for i, model in enumerate(pdb_lines): tmpfile.write(f"MODEL {i + 1}\n") for atom in model: tmpfile.write(atom) tmpfile.write("ENDMDL\n") else: with tempfile.NamedTemporaryFile( suffix=".pdb", mode="w+", delete=False ) as tmpfile: for line in pdb_lines: tmpfile.write(line) # Load sorted atom pdb using MDAnalysis and remove tempfile struct = mda.Universe(tmpfile.name, in_memory=True) struct.add_bonds(bonds) os.remove(tmpfile.name) else: struct = mda.Universe(pdb, in_memory=True) bonds = guess_bonds(struct.atoms.positions, struct.atoms.types) struct.add_bonds(bonds) # Store spin atoms if provided if spin_atoms is not None: if isinstance(spin_atoms, str): spin_atoms = spin_atoms.split() if isinstance(spin_atoms, dict): spin_weights = list(spin_atoms.values()) spin_atoms = list(spin_atoms.keys()) else: w = 1 / len(spin_atoms) spin_weights = [w for _ in spin_atoms] spin_atoms = {"spin_atoms": spin_atoms, "spin_weights": spin_weights} return struct, spin_atoms def aln_sanity(internal_coords, resname, backbone_atoms=None, aln_atoms=None): """ Sanity check that the alignment coordinates of mono-functional rotamer ensembles and backbone indices are correct. Parameters ---------- internal_coords : chiLife.MolSysIC Internal coordinates of a rotamer ensemble. resname : str Name of the residue making up the rotamer ensemble. backbone_atoms: List[str] Names of the atoms belonging to the backbone rotamer ensemble. aln_atoms: List[str] Names of the atoms to use for alignments. Returns ------- backbone_atoms, aln_atoms """ if backbone_atoms is None and aln_atoms: root_idx = np.argwhere(internal_coords.atom_names == aln_atoms[1]).flat[0] aname_lst = internal_coords.atom_names.tolist() neighbor_idx = [aname_lst.index(a) for a in aln_atoms[::2]] backbone_atoms = get_backbone_atoms( internal_coords.topology.graph, root_idx, neighbor_idx ) backbone_atoms = [aname_lst[idx] for idx in backbone_atoms] elif backbone_atoms is None: bb_candidates = get_bb_candidates(internal_coords.atom_names, resname) backbone_atoms = [b for b in bb_candidates if b in internal_coords.atom_names] # determine alignment atoms based on where side chain atoms connect to backbone atoms bb_idxs = np.argwhere( np.isin(internal_coords.atom_names, backbone_atoms) ).flatten() root_candidates = [] aln_candidates = [] for bb in bb_idxs: neighbors = [ i for i in internal_coords.topology.graph.neighbors(bb) if internal_coords.atom_types[i] != "H" ] if not all(np.isin(neighbors, bb_idxs)) and len(neighbors) > 1: root_candidates.append(bb) aln_candidates.append([n for n in neighbors if n in bb_idxs] + [bb]) aln_candidates = [cdt for cdt in aln_candidates if len(cdt) == 3] if len(root_candidates) == 1: aln_atoms = [ internal_coords.atom_names[idx] for idx in sorted(aln_candidates[0]) ] elif len(root_candidates) > 1: raise RuntimeError( "There are more than one possible sets of alignment atoms for this residue. These " "include:\n" + "\n".join( [ f"atoms names: {[backbone_atoms[idx] for idx in idxs]}" for idxs in aln_candidates ] ) + "\nPlease select one using the `aln_atoms` keyword argument. The alignment atoms should " "be the backbone atom preceding the first side chain atom and it's two adjacent " "backbone atoms" ) else: raise RuntimeError( "The optimal set of alignment atoms to use for this residue is ambiguous. Please specify" "explicitly using the `aln_atoms` keyword argument." ) if backbone_atoms == []: raise RuntimeError( "chiLife was unable to find a suitable set of atoms representing the macromolecular " "backbone. Please ensure your rotamer ensemble uses standard protein or nucleic acid " "backbone atom names, or specify the alignment atoms manually using the `aln_atoms`" "keyword argument." ) return backbone_atoms, aln_atoms def prep_restype_savedict( libname: str, resname: str, internal_coords: "MolSysIC", weights: "ArrayLike", dihedrals: "ArrayLike", dihedral_atoms: "ArrayLike", sigmas: "ArrayLike" = None, resi: int = 1, spin_atoms: List[str] = None, aln_atoms: List[str] = None, backbone_atoms: List[str] = None, description: str = None, comment: str = None, reference: str = None, ) -> Dict: """ Helper function to add new residue types to chilife. Parameters ---------- libname : str Name of residue to be stored. resname : str Residue name (3-letter code) internal_coords : List[MolSysIC] list of internal coordinates of the new residue type. weights : ArrayLike Array of weights corresponding to each rotamer of the library dihedrals : ArrayLike Array of mobile dihedral angles for each rotamer fo the library dihedral_atoms : ArrayLike Definition of mobile dihedral angles for a single structure. Should be a list of 4 string lists where the four strings are the names of the atoms that define the dihedral. sigmas : ArrayLike Array of sigma values for each dihedral of each rotamer. resi: int The residue number to be stored. spin_atoms: List[str] A list of atom names corresponding to the atoms where the spin density resides. aln_atoms: List[str] The three atoms defining the alignment coordinate frame of the target residue. Usually the N-CA-C atoms of the protein backbone or similar for nucleotide backbones. backbone_atoms: List[str] Names of the atoms that correspond to backbone atoms. This includes aln_atoms and any other atoms that may need to be adjusted to fit the target residue backbone, e.g. the hydrogen atoms of carboxyl oxygen atoms. Any atoms not part of the backbone will be considered side chain atoms and may be subject to sampling. description : str A short (< 60 characters) description of the side chain library being created comment : str Additional information about the rotamer library that may not fit in description. reference : str Any relevant citations associated with the rotamer library. Returns ------- save_dict : dict Dictionary of all the data needed to build a RotamerEnsemble. """ # Extract coordinates and transform to the local frame if len(dihedrals) > 1 and len(internal_coords) == 1: idxs = np.zeros(len(dihedrals), dtype=int) new_z_matrix = internal_coords.batch_set_dihedrals( idxs, dihedrals, resi, dihedral_atoms ) internal_coords.load_new(new_z_matrix) atom_types = internal_coords.atom_types.copy() atom_names = internal_coords.atom_names.copy() coords = internal_coords.protein.trajectory.coordinate_array.copy() if np.any(np.isnan(coords)): idxs = np.argwhere(np.isnan(np.sum(coords, axis=(1, 2)))).T[0] adxs = np.argwhere(np.isnan(np.sum(coords, axis=(0, 2)))).T[0] adxs = atom_names[adxs] raise ValueError( f"Coords of rotamer {' '.join((str(idx) for idx in idxs))} at atoms {' '.join((str(idx) for idx in adxs))} " f"cannot be converted to internal coords because there is a chain break in this rotamer. Either enforce " f"bonds explicitly by adding CONECT information to the PDB or fix the structure(s)" ) save_dict = { "rotlib": libname, "resname": resname, "coords": coords, "internal_coords": internal_coords, "weights": weights, "atom_types": atom_types, "atom_names": atom_names, "dihedrals": dihedrals, "dihedral_atoms": dihedral_atoms, "aln_atoms": aln_atoms, "backbone_atoms": backbone_atoms, "description": description, "comment": comment, "reference": reference, } if sigmas is None: pass elif sigmas.shape == dihedrals.shape: save_dict["sigmas"] = sigmas elif sigmas.shape == (*dihedrals.shape, 3): save_dict["sigmas"] = sigmas[..., 2] save_dict["locs"] = sigmas[..., 1] save_dict["skews"] = sigmas[..., 0] if spin_atoms: save_dict.update(spin_atoms) save_dict["type"] = "chilife rotamer library" save_dict["format_version"] = max(io.rotlib_formats.keys()) return save_dict
[docs] def add_rotlib_dir(directory: Union[Path, str]) -> None: """ Add a directory to search for rotlibs when none are found in te working directory. This directory will be searched before the chiLife default rotlib directory. Parameters ---------- directory: Path, str String or Path object of the desired directory. """ directory = Path(directory) with open(RL_DIR / "additional_rotlib_dirs.txt", "a+") as f: f.write(str(directory)) f.write("\n") USER_RL_DIR.append(Path(directory))
[docs] def remove_rotlib_dir(directory: Union[Path, str]) -> None: """ Remove a user added directory to search for rotlibs. Parameters ---------- directory: Path, str String or Path object of the desired directory. """ directory = Path(directory) if directory in USER_RL_DIR: USER_RL_DIR.remove(directory) with open(RL_DIR / "additional_rotlib_dirs.txt", "r") as f: TMP = [ Path(line.strip()) for line in f.readlines() if Path(line.strip()) != directory ] with open(RL_DIR / "additional_rotlib_dirs.txt", "w") as f: for p in TMP: f.write(str(p)) f.write("\n")
[docs] def add_library( filename: Union[str, Path], libname: str = None, default: bool = False, force: bool = False, ): """ Add the provided rotamer library to the chilife rotamer library directory so that it does not need to be in the working directory when utilizing. .. note:: It is generally preferred to keep a custom directory of rotlibs and use :func:`~chilife.chilife.add_rotlib_dir` to force chiLife to search that directory for rotlibs since this function adds rotlibs to the chilife install directory that can be overwritten with updates and is not preserved when updating python. Parameters ---------- filename : str, Path Name or Path object oof the rotamer library npz file. libname : str Unique name of the rotamer library default : bool If True, chilife will make this the default rotamer library for this residue type. force : bool If True, chilife will overwrite any existing rotamer library with the same name if it exists. """ store_loc = (RL_DIR / "user_rotlibs/") / filename filename = Path(filename) if libname is None: libname = re.sub("_d{0,1}rotlib.(npz|zip)", "", filename.name) library = io.read_library(Path(filename), None, None) drotlib = False if isinstance(library, tuple): drotlib = True library, _, _ = library resname = str(library["resname"]) add_to_defaults(resname, libname, default) if force or not store_loc.exists(): shutil.copy(filename, str(store_loc)) if drotlib: global USER_dLIBRARIES USER_dLIBRARIES.add(libname) else: global USER_LIBRARIES USER_LIBRARIES.add(libname) add_dihedral_def(libname, library["dihedral_atoms"].tolist(), force=force) else: raise NameError( "A rotamer library with this name already exists! Please choose a different name or do" "not store as a permanent rotamer library" )
def add_dihedral_def(name: str, dihedrals: "ArrayLike", force: bool = False) -> None: """Helper function to add the dihedral definitions of user defined labels and libraries to the chilife knowledge base. Parameters ---------- name : str Name of the residue. dihedrals : ArrayLike List of lists of atom names defining the dihedrals. force : bool Overwrite any dihedral definition with the same name if it exists. """ # Reload in case there were other changes if not add_to_toml( DATA_DIR / "dihedral_defs.toml", key=name, value=dihedrals, force=force ): raise ValueError( f"There is already a dihedral definition for {name}. Please choose a different name." ) # Add to active dihedral def dict dihedral_defs[name] = dihedrals
[docs] def remove_library(name: str, prompt: bool = True): """ Removes a library from the chilife rotamer library directory and from the chilife dihedral definitions Parameters ---------- name : str Name of the rotamer library prompt : bool If set to False al warnings will be ignored. """ global USER_LIBRARIES, USER_dLIBRARIES if (name not in USER_LIBRARIES) and (name not in USER_dLIBRARIES) and prompt: raise ValueError( f"{name} is not in the set of user labels or user dLables. Check to make sure you have the " f"right label. Note that only user labels can be removed." ) if prompt: ans = input( f"WARNING: You have requested the permanent removal of the {name} label/rotamer library. \n" f"Are you sure you want to do this? (y/n)" ) if ans.lower().startswith("y"): pass elif ans.lower().startswith("n"): print("Canceling label removal") return None else: print(f'"{ans}" is not an intelligible answer. Canceling label removal') return None # Remove files files = ( list((RL_DIR / "user_rotlibs").glob(f"{name}*")) + list((RL_DIR / "residue_internal_coords").glob(f"{name}*")) + list((RL_DIR / "residue_pdbs").glob(f"{name}*")) ) for file in files: if file.exists(): os.remove(str(file)) if name in USER_dLIBRARIES: USER_dLIBRARIES.remove(name) if name in USER_LIBRARIES: USER_LIBRARIES.remove(name) if name in dihedral_defs: del dihedral_defs[name] remove_from_toml(DATA_DIR / "dihedral_defs.toml", name) remove_from_defaults(name)
def add_to_toml( file: Union[str, Path], key: str, value: Union[str, List, dict], force: bool = False ): """Helper function to add new key:value pairs to toml files like dihedral_defs.toml Parameters ---------- file : str, Path Name or path to the toml file to be edited. key : str toml entry to be added. value : str, List, dict Item to add to the toml file under the key. force : bool If True, any existing data under the `key` entry will be overwritten. """ with open(file, "r") as f: local = rtoml.load(f) if key in local and not force: return False if key is not None: local[key] = value with open(file, "w") as f: rtoml.dump(local, f) return True def remove_from_toml(file: Union[str, Path], entry: str): """ Remove an entry for a chilife toml file Parameters ---------- file : str, Path Name or Path of the toml file to be edited. entry : str Entry or key of the toml file to be removed. """ with open(file, "r") as f: local = rtoml.load(f) if entry in local: del local[entry] with open(file, "w") as f: rtoml.dump(local, f) return True def add_to_defaults(resname: str, rotlib_name: str, default: bool = False): """ Helper function to add a rotamer library to the defaults stack. Parameters ---------- resname : str 3-letter code name of the residue. rotlib_name : str Name of the rotamer library. default : bool If True the rotamer library will be added to the top of the defaults stacking making it the new default rotamer library for the residue type. If False it will be added to the bottom of the stack and will only be the default if there are no other rotamer libraries for the residue type. """ file = RL_DIR / "defaults.toml" with open(file, "r") as f: local = rtoml.load(f) backup = deepcopy(local) if resname not in local: local[resname] = [] if resname not in rotlib_defaults: rotlib_defaults[resname] = [] pos = 0 if default else len(local[resname]) if rotlib_name not in rotlib_defaults[resname]: rotlib_defaults[resname].insert(pos, rotlib_name) if rotlib_name not in local[resname]: local[resname].insert(pos, rotlib_name) safe_save(file, local, backup) def remove_from_defaults(rotlib_name: str): """ Helper function to remove a rotamer library from the defaults stack. Parameters ---------- rotlib_name : str Name of the rotamer library to be removed. """ file = RL_DIR / "defaults.toml" with open(file, "r") as f: local = rtoml.load(f) backup = deepcopy(local) keys = [key for key, val in local.items() if rotlib_name in val] for key in keys: rotlib_defaults[key].remove(rotlib_name) local[key].remove(rotlib_name) if local[key] == []: del local[key] safe_save(file, local, backup) def safe_save(file: Union[str, Path], data: dict, backup: dict): """ Helper function to save toml files. Will preserve original toml file if there is an error. Parameters ---------- file : str, Path Name of the toml file to be saved. data : dict Data to save in the toml file. backup : dict Original data from the toml file to save if there is an error. """ try: with open(file, "w") as f: rtoml.dump(data, f) except rtoml._rtoml.TomlSerializationError: with open(file, "w") as f: rtoml.dump(backup, f) raise def _print_monofunc(files: List[Path]): """ Print information of a list of mono-functional rotamer libraries. Parameters ---------- files : List[Path] List of Path objects corresponding to the rotamer library files. """ maxw = max([len(file.stem) for file in files]) - 6 wrapper = textwrap.TextWrapper( width=80, subsequent_indent=" " * (maxw + 3), replace_whitespace=False, drop_whitespace=False, ) print(f"{'monofunctional' + ':':^80}") print("-" * 80) for file in files: with np.load(file, allow_pickle=True) as f: for key in f.keys(): resname = file.stem if resname.endswith("_rotlib"): resname = resname[: -len("_rotlib")] if "description" in f: descr = f["description"] else: descr = "No description" print("\n".join(wrapper.wrap(f"{resname:<{maxw}} : {descr}"))) print("-" * 80) def _print_bifunc(files): """ Print information of a list of bifunctional rotamer libraries. Parameters ---------- files : List[Path] List of Path objects corresponding to the rotamer library files. """ maxw = max([len(file.stem) for file in files]) - 7 wrapper = textwrap.TextWrapper( width=80, subsequent_indent=" " * (maxw + 3), replace_whitespace=False, drop_whitespace=False, ) print(f"{'bifunctional' + ':':^80}") print("-" * 80) for file in files: with zipfile.ZipFile(file, "r") as archive: name = archive.namelist()[0] resname = file.stem context = name[3:6] if resname.endswith("_drotlib"): resname = resname[: -len("_drotlib")] with archive.open(name) as of: with np.load(of, allow_pickle=True) as f: if "description" in f: descr = f["description"] else: descr = "No description" print( "\n".join( wrapper.wrap(f"{resname:<{maxw}} : {descr} in the {context} context") ) ) print("-" * 80)
[docs] def list_available_rotlibs(): """ Lists residue types and rotamer libraries that are currently available. More information on any individual rotamer library by using the rotlib_info function. """ print() dirs = {"current working directory": Path.cwd()} dirs.update( {f"user rotlib directory {n + 1}": u_dir for n, u_dir in enumerate(USER_RL_DIR)} ) dirs.update({"chilife rotlib directory": RL_DIR / "user_rotlibs"}) for dname, dpath in dirs.items(): monofunctional = tuple(dpath.glob("*_rotlib.npz")) bifunctional = tuple(dpath.glob("*_drotlib.zip")) if len(monofunctional) + len(bifunctional) > 0: print("*" * 80) print(f"*{f'Rotlibs in {dname}':^78}*") if "user" in dname: print(f"*{dname.name:^78}*") print("*" * 80) if len(monofunctional) > 0: _print_monofunc(monofunctional) if len(bifunctional) > 0: _print_bifunc(bifunctional) print() print("*" * 80) print(f"*{'DUNBRACK ROTLIBS':^78}*") print( f"*{'ARG, ASN, ASP, CSY, GLN, GLU, HIS, ILE, LEU, LYS, MET, PHE, PRO, SER,':^78}*" ) print(f"*{'THR, TRP, TYR, VAL':^78}*") print(f"*{'(no ALA, GLY)':^78}*") print("*" * 80)
def rotlib_info(rotlib: Union[str, Path]): """ Display detailed information about the rotamer library. Parameters ---------- rotlib : str, Path Name of the rotamer library to print the information of. """ mono_rotlib = io.get_possible_rotlibs( rotlib, suffix="rotlib", extension=".npz", was_none=False, return_all=True ) bifunc_rotlib = io.get_possible_rotlibs( rotlib, suffix="drotlib", extension=".zip", was_none=False, return_all=True ) if (mono_rotlib is None) and (bifunc_rotlib is None): print( "No rotlib has been found with this name. Please make sure it is spelled correctly and that " / "it includes the path to the file if it is not a directory specified by `chilife.add_rotlib_dir`" ) if mono_rotlib is not None: if len(mono_rotlib) > 1: print( "chiLife has found several mono-functional rotlibs that match the provided name:" ) for lib_file in mono_rotlib: _print_rotlib_info(lib_file) if bifunc_rotlib is not None: if len(bifunc_rotlib) > 1: print( "chiLife has found several bi-functional rotlibs that match the provided name:" ) for lib_file in bifunc_rotlib: _print_drotlib_info(lib_file) def _print_rotlib_info(lib_file: Union[str, Path]): """ Print information about a single rotamer library file. Parameters ---------- lib_file : Union[str, Path] string designating the path or a Path object to the rotamer library .npz file. """ lib = io.read_rotlib(lib_file) wrapper = textwrap.TextWrapper( width=80, subsequent_indent=" ", replace_whitespace=False, drop_whitespace=False, ) print() print("*" * 80) print(f"* Rotamer Library Name: {lib['rotlib']:>52} *") print("*" * 80) atom_counts = ", ".join( f"{key}: {val}" for key, val in Counter(lib["atom_types"]).items() ) myl = [ wrapper.wrap(i) for i in ( f"Rotamer Library Name: {lib['rotlib']}", f"File: {lib_file}", f"Description: {lib['description']}", f"Comment: {lib['comment']}\n", f"Length of library: {len(lib['weights'])}", "Dihedral definitions: ", *[f" {d}" for d in lib["dihedral_atoms"]], f"Spin atoms: {lib.get('spin_atoms')}", f"Number of atoms: {atom_counts}", f"Number of heavy atoms: {np.sum(lib['atom_types'] != 'H')}", f"Reference: {lib['reference']}", f"chiLife rotlib format: {lib['format_version']}", "*" * 80, ) ] print("\n".join(list(chain.from_iterable(myl)))) def _print_drotlib_info(lib_file: Union[str, Path]): """ Print information about a single bifunctional rotamer library file. Parameters ---------- lib_file : Union[str, Path] String designating the path or a Path object to the bifunctional rotamer library .zip file. """ lib = io.read_drotlib(lib_file) wrapper = textwrap.TextWrapper( width=80, subsequent_indent=" ", replace_whitespace=False, drop_whitespace=False, ) libA, libB, csts = lib libBmask = ~np.isin(libB["atom_names"], csts) print() print("*" * 80) print(f"* Rotamer Library Name: {libA['rotlib']:>52} *") print("*" * 80) atypes = np.concatenate((libA["atom_types"], libB["atom_types"][libBmask])) atom_counts = ", ".join(f"{key}: {val}" for key, val in Counter(atypes).items()) myl = [ wrapper.wrap(i) for i in ( f"Rotamer Library Name: {libA['rotlib']}", f"File: {lib_file}", f"Description: {libA['description']}", f"Comment: {libA['comment']}\n", f"Length of library: {len(libA['weights'])}", "Dihedral definitions: ", " site 1:", *[f" {d}" for d in libA["dihedral_atoms"]], " site 2:", *[f" {d}" for d in libB["dihedral_atoms"]], f"Spin atoms: {libA['spin_atoms']}", f"Number of atoms: {atom_counts}\n", f"Reference: {libA['reference']}", f"chiLife rotlib format: {libA['format_version']}", "*" * 80, ) ] print("\n".join(list(chain.from_iterable(myl))))
[docs] def repack( protein: Union["mda.Universe", "mda.AtomGroup"], *ensembles: Union["RotamerEnsemble", "LigandEnsemble"], repetitions: int = 200, temp: float = 1, energy_func: Callable = None, off_rotamer=False, **kwargs, ) -> Tuple["mda.Universe", "ArrayLike"]: """Markov chain Monte Carlo repack a protein around any number of SpinLabel or RotamerEnsemble objects. Parameters ---------- protein : MDAnalysis.Universe, MDAnalysis.AtomGroup MolSys to be repacked ensembles : RotamerEnsemble, SpinLabel RotamerEnsemble or SpinLabel object placed at site of interest repetitions : int Number of successful MC samples to perform before terminating the MC sampling loop temp : float, ArrayLike Temperature (Kelvin) for both clash evaluation and metropolis-hastings acceptance criteria. Accepts a list or array like object of temperatures if a temperature schedule is desired energy_func : Callable Energy function to be used for clash evaluation. Must accept a protein and RotamerEnsemble object and return an array of potentials in kcal/mol, with one energy per rotamer in the rotamer ensemble off_rotamer : bool Boolean argument that decides whether off rotamer sampling is used when repacking the provided residues. kwargs : dict Additional keyword arguments to be passed to ``mutate`` . Returns ------- protein: MDAnalysis.Universe MCMC trajectory of local repack deltaEs: np.ndarray: Change in energy_func score at each acceptance of MCMC trajectory """ residue_ensembles = [ens for ens in ensembles if isinstance(ens, RotamerEnsemble)] ligand_ensembles = [ens for ens in ensembles if isinstance(ens, LigandEnsemble)] temp = np.atleast_1d(temp) KT = {t: GAS_CONST * t for t in temp} energy_func = ljEnergyFunc() if energy_func is None else energy_func repack_radius = ( kwargs.pop("repack_radius") if "repack_radius" in kwargs else None ) # Angstroms if repack_radius is None: repack_radius = max([SL.clash_radius for SL in ensembles]) # Construct a new spin labeled protein and preallocate variables to retain monte carlo trajectory sites_str = " or ".join(f"( {ens.selstr} )" for ens in ensembles) protein = mutate(protein, *ensembles, **kwargs).atoms # Determine the residues near the spin label that will be repacked repack_residues = protein.select_atoms( f"around {repack_radius} {sites_str}" ).residues # Get construction information for rotamers and ligands repack_res_kwargs = ( residue_ensembles[0].input_kwargs if len(residue_ensembles) > 0 else {} ) repack_lig_kwargs = ( ligand_ensembles[0].input_kwargs if len(ligand_ensembles) > 0 else {} ) # If only a ligand is passed use ligand params for use_H if repack_res_kwargs == {} and repack_lig_kwargs != {}: repack_res_kwargs["use_H"] = ligand_ensembles[0].use_H repack_res_kwargs["eval_clash"] = False repack_residue_libraries = [ RotamerEnsemble.from_mda(res, **repack_res_kwargs) for res in repack_residues if res.resname not in ["GLY", "ALA"] and res.resname in SUPPORTED_RESIDUES ] repack_residue_libraries += list(ensembles) # Create new labeled protein construct to fill in any missing atoms of repack residues protein = mutate(protein, *repack_residue_libraries, **kwargs).atoms repack_residues = protein.select_atoms( f"around {repack_radius} {sites_str}" ).residues repack_residue_libraries = [ RotamerEnsemble.from_mda(res, **repack_res_kwargs) for res in repack_residues if res.resname not in ["GLY", "ALA"] and res.resname in SUPPORTED_RESIDUES ] repack_residue_libraries += [ RotamerEnsemble( RL.res, RL.site, protein, chain=RL.chain, rotlib=RL._rotlib, **repack_res_kwargs, ) for RL in residue_ensembles ] repack_residue_libraries += [ LigandEnsemble( LE.res, LE.site, protein, chain=LE.chain, rotlib=LE._rotlib, **repack_lig_kwargs, ) for LE in ligand_ensembles ] traj = np.empty((repetitions, *protein.positions.shape)) deltaEs = [] sample_freq = np.array( [len(res.dihedral_atoms) for res in repack_residue_libraries], dtype=np.float64 ) mean = sample_freq[sample_freq > 1].mean() sample_freq[sample_freq == 1] = mean sample_freq /= sample_freq.sum() count, acount, bcount, bidx = 0, 0, 0, 0 schedule = repetitions / (len(temp) + 1) with tqdm(total=repetitions) as pbar: while count < repetitions: # Randomly select a residue from the repacked residues SiteLibrary = repack_residue_libraries[ np.random.choice(len(repack_residue_libraries), p=sample_freq) ] if not hasattr(SiteLibrary, "dummy_label"): SiteLibrary.dummy_label = SiteLibrary.copy() SiteLibrary.dummy_label._protein = protein SiteLibrary.dummy_label.mask = np.isin( protein.ix, SiteLibrary.clash_ignore_idx ) SiteLibrary.dummy_label._coords = np.atleast_3d( [protein.atoms[SiteLibrary.dummy_label.mask].positions] ) with np.errstate(divide="ignore"): SiteLibrary.dummy_label.E0 = energy_func( SiteLibrary.dummy_label ) - KT[temp[bidx]] * np.log(SiteLibrary.current_weight) DummyLabel = SiteLibrary.dummy_label coords, weight = SiteLibrary.sample( off_rotamer=off_rotamer, remove_clashing=False ) with np.errstate(divide="ignore"): DummyLabel._coords = np.atleast_3d([coords]) E1 = energy_func(DummyLabel) - KT[temp[bidx]] * np.log(weight) deltaE = E1 - DummyLabel.E0 deltaE = np.maximum(deltaE, -10.0) acount += 1 # Metropolis-Hastings criteria if ( E1 < DummyLabel.E0 or np.exp(-deltaE / KT[temp[bidx]]) > np.random.rand() ): deltaEs.append(deltaE) protein.atoms[DummyLabel.mask].positions = coords DummyLabel.E0 = E1 traj[count] = protein.atoms.positions SiteLibrary.update_weight(weight) count += 1 bcount += 1 pbar.update(1) if bcount > schedule: bcount = 0 bidx = np.minimum(bidx + 1, len(temp) - 1) logging.info(f"Total counts: {acount}") # Load MCMC trajectory into universe. protein.universe.load_new(traj) return protein, np.squeeze(deltaEs)
def continuous_topol(atoms, bonds): """ Determines if there is a chain break in the set of atoms and bonds provided. Parameters ---------- atoms: Universe, AtomGroup, MolSys Set of atoms to be checked for chain breaks. bonds: ArrayLike[Tuple[int]] Array of tuples of integers defining bonds by their atom indices. Returns ------- G.is_connected: bool The boolean value of whether the set of atoms are connected. """ ixmap = {ix: i for i, ix in enumerate(atoms.ix)} ibonds = np.vectorize(ixmap.get)(bonds) G = ig.Graph(n=atoms.n_atoms, edges=ibonds) return G.is_connected()