import igraph as ig
from copy import deepcopy
from itertools import combinations
import logging
import warnings
import inspect
from functools import partial
from pathlib import Path
import numpy as np
from scipy.spatial import cKDTree
import scipy.optimize as opt
import MDAnalysis as mda
import chilife.io as io
import chilife.RotamerEnsemble as re
import chilife.scoring as scoring
from chilife.protein_utils import make_mda_uni
from .MolSysIC import MolSysIC
from .base_classes import Ensemble
[docs]
class dRotamerEnsemble(Ensemble):
"""Create new dRotamerEnsemble object.
Parameters
----------
res : string
3-character name of desired residue, e.g. RXA.
site : tuple[int, int]
MolSys residue numbers to attach the bifunctional library to.
protein : MDAnalysis.Universe, MDAnalysis.AtomGroup
Object containing all protein information (coords, atom types, etc.)
chain : str
MolSys chain identifier to attach the bifunctional ensemble to.
rotlib : str
Rotamer library to use for constructing the RotamerEnsemble
**kwargs : dict
restraint_weight: float
Force constant (kcal/mol/A^2) for calculating energetic penalty of restraint satisfaction, i.e. the
alignment of the overlapping atoms of the two mono-functional subunits of the bifunctional label.
torsion_weight: float
Force constant (kcal/mol/radian^2) for calculating energetic penalty of the deviation from rotamer
starting dihedral angles.
minimize: bool
Switch to turn on/off minimization. During minimization each rotamer is optimized in dihedral space
with respect to alignment of the "cap" atoms of the two mono-functional subunits, internal clashes
and deviation from the starting conformation in dihedral space.
min_method: str
Name of the minimization algorithm to use. All ``scipy.optimize.minimize`` algorithms are available
and include: ‘Nelder-Mead’, ‘Powell’, ‘CG’, ‘BFGS’, ‘Newton-CG’, ‘L-BFGS-B’, ‘TNC’, ‘COBYLA’, ‘SLSQP’,
‘trust-constr’, ‘dogleg’, ‘trust-ncg’, ‘trust-exact’, ‘trust-krylov’, and custom.
exclude_nb_interactions: int:
When calculating internal clashes, ignore 1-``exclude_nb_interactions`` interactions and below. Defaults
to ignore 1-3 interactions, i.e. atoms that are connected by 2 bonds or fewer will not have a steric
effect on each other.
eval_clash : bool
Switch to turn clash evaluation on (True) and off (False).
forcefield: str
Name of the forcefield you wish to use to parameterize atoms for the energy function. Currently,
supports `charmm` and `uff`
energy_func : callable
Python function or callable object that takes a protein or a RotamerEnsemble object as input and
returns an energy value (kcal/mol) for each rotamer in the ensemble. See also
:mod:`Scoring <chiLife.scoring>`. Defaults to a capped lennard-jones potentail.
temp : float
Temperature to use when running ``energy_func``. Only used if ``energy_func`` accepts a temperature
argument Defaults to 298 K.
clash_radius : float
Cutoff distance (angstroms) for inclusion of atoms in clash evaluations. This distance is measured from
``clash_ori`` Defaults to the longest distance between any two atoms in the rotamer ensemble plus 5
angstroms.
clash_ori : str
Atom selection to use as the origin when finding atoms within the ``clash_radius``. Defaults to 'cen',
the centroid of the rotamer ensemble heavy atoms.
protein_tree : Scipy.spatial.cKDTree
KDTree of atom positions for fast distance calculations and neighbor detection. Defaults to None
trim: bool
When true, the lowest `trim_tol` fraction of rotamers in the ensemble will be removed.
trim_tol: float
Tolerance for trimming rotamers from the ensemble. trim_tol=0.005 means the bottom 0.5% of rotamers
will be removed.
alignment_method : str
Method to use when attaching or aligning the rotamer ensemble backbone with the protein backbone.
Defaults to ``'bisect'`` which aligns the CA atom, the vectors bisecting the N-CA-C angle and the
N-CA-C plane.
dihedral_sigmas : float, numpy.ndarray
Standard deviations of dihedral angles (degrees) for off rotamer sampling. Can be a single number for
isotropic sampling, a vector to define each dihedral individually or a matrix to define a value for
each rotamer and each dihedral. Setting this value to np.inf will force uniform (accessible volume)
sampling. Defaults to 35 degrees.
weighted_sampling : bool
Determines whether the rotamer ensemble is sampled uniformly or based off of their intrinsic weights.
Defaults to False.
use_H : bool
Determines if hydrogen atoms are used or not. Defaults to False.
Attributes
----------
res : string
3-character name of desired residue, e.g. RXA.
site1 : int
MolSys residue number of the first attachment site.
site2 : int
MolSys residue number of the second attachment site.
increment : int
Number of residues between two attachment sites.
protein : MDAnalysis.Universe, MDAnalysis.AtomGroup, :class:`~MolSys`
Object containing all protein information (coords, atom types, etc.)
chain : str
Chain identifier of the site the bifunctional ensemble is attached to.
name : str
Name of the ensemble. Usually include the native amino acid, the site number and the label that was attached
changing the name will change the object name when saving in a PDB.
RE1 : RotamerEnsemble
Monofunctional ensemble subunit attached to the first site.
RE2 : RotamerEnsemble
Monofunctional ensemble subunit attached to the second site.
"""
backbone_atoms = ["H", "N", "CA", "HA", "C", "O"]
def __init__(self, res, sites, protein=None, chain=None, rotlib=None, **kwargs):
self.res = res
self.site1, self.icode1, self.site2, self.icode2 = proc_sites(sites)
self.site = self.site1
self.increment = self.site2 - self.site1
self.kwargs = kwargs
self.protein = protein
self.chain = chain if chain is not None else self.guess_chain()
self.input_kwargs = kwargs
self.__dict__.update(dassign_defaults(kwargs))
self.get_lib(rotlib)
self.create_ensembles()
self.RE1.backbone_to_site()
self.RE2.backbone_to_site()
self.cst_idx1 = np.where(self.RE1.atom_names[None, :] == self.csts[:, None])[1]
self.cst_idx2 = np.where(self.RE2.atom_names[None, :] == self.csts[:, None])[1]
_, idx1 = np.unique(self.cst_idx1, return_index=True)
_, idx2 = np.unique(self.cst_idx2, return_index=True)
self.cst_idx1 = self.cst_idx1[np.sort(idx1)]
self.cst_idx2 = self.cst_idx2[np.sort(idx2)]
for i in range(1, len(self.cst_idx2)):
if self.RE2.atom_names[self.cst_idx2[i]] == self.RE2.atom_names[self.cst_idx2[i-1]]:
self.cst_idx2[i - 1], self.cst_idx2[i] = self.cst_idx2[i], self.cst_idx2[i - 1]
self.rl1mask = np.argwhere(~np.isin(self.RE1.atom_names, self.csts)).flatten()
self.rl2mask = np.argwhere(~np.isin(self.RE2.atom_names, self.csts)).flatten()
self.name = self.res
if self.site1 is not None:
self.name = f"{self.RE1.nataa}{self.site1}-{self.RE2.nataa}{self.site2}{self.res}"
if self.chain is not None:
self.name += f"_{self.chain}"
self.selstr = f"resid {self.site1} {self.site2} and segid {self.chain} and not altloc B"
self._graph = ig.Graph(edges=self.bonds)
if self.clash_radius is None:
self.clash_radius = np.linalg.norm(self.clash_ori - self.coords, axis=-1).max() + 5
self.protein_setup()
self.sub_labels = (self.RE1, self.RE2)
@property
def weights(self):
"""Array of the fractions of rotamer populations for each rotamer in the library."""
return self.RE1.weights
@weights.setter
def weights(self, value):
self.RE1.weights = value
self.RE2.weights = value
@property
def coords(self):
"""The 3D cartesian coordinates of each atom of each rotamer in the library."""
ovlp = (self.RE1.coords[:, self.cst_idx1] + self.RE2.coords[:, self.cst_idx2]) / 2
return np.concatenate([self.RE1._coords[:, self.rl1mask], self.RE2._coords[:, self.rl2mask], ovlp], axis=1)
@coords.setter
def coords(self, value):
if value.shape[1] != len(self.atom_names):
raise ValueError(
f"The provided coordinates do not match the number of atoms of this ensemble ({self.res})"
)
self.RE1._coords[:, self.rl1mask] = value[:, :len(self.rl1mask)]
self.RE2._coords[:, self.rl2mask] = value[:, len(self.rl1mask):len(self.rl1mask) + len(self.rl2mask)]
self.RE1._coords[:, self.cst_idx1] = value[:, len(self.rl1mask) + len(self.rl2mask):]
self.RE2._coords[:, self.cst_idx2] = value[:, len(self.rl1mask) + len(self.rl2mask):]
@property
def _lib_coords(self):
ovlp = (self.RE1._lib_coords[:, self.cst_idx1] + self.RE2._lib_coords[:, self.cst_idx2]) / 2
return np.concatenate([self.RE1._lib_coords[:, self.rl1mask],
self.RE2._lib_coords[:, self.rl2mask], ovlp], axis=1)
@property
def atom_names(self):
"""The names of each atom in the rotamer"""
return np.concatenate((self.RE1.atom_names[self.rl1mask],
self.RE2.atom_names[self.rl2mask],
self.RE1.atom_names[self.cst_idx1]))
@property
def atom_types(self):
"""The element or atom type of each atom in the rotamer."""
return np.concatenate((self.RE1.atom_types[self.rl1mask],
self.RE2.atom_types[self.rl2mask],
self.RE1.atom_types[self.cst_idx1]))
@property
def dihedral_atoms(self):
"""Four atom sets defining each flexible dihedral of the side chain"""
return np.concatenate([self.RE1.dihedral_atoms, self.RE2.dihedral_atoms])
@property
def dihedrals(self):
"""Dihedral angle values of each dihedral defined in :py:attr::`~dihedral_atoms` for each rotamer in the
library"""
return np.concatenate([self.RE1.dihedrals, self.RE2.dihedrals], axis=-1)
@property
def centroid(self):
"""The geometric center of all atoms of all rotamers in the rotamer library"""
return self.coords.mean(axis=(0, 1))
@property
def clash_ori(self):
"""The origin used to determine if an external atom will be considered for clashes using the
``clash_radius`` property of the ensemble"""
if isinstance(self._clash_ori_inp, (np.ndarray, list)):
if len(self._clash_ori_inp) == 3:
return self._clash_ori_inp
elif isinstance(self._clash_ori_inp, str):
if self._clash_ori_inp in ["cen", "centroid"]:
return self.centroid
elif (ori_name := self._clash_ori_inp.upper()) in self.atom_names:
return np.squeeze(self.coords[0][ori_name == self.atom_names])
else:
raise ValueError(
f"Unrecognized clash_ori option {self._clash_ori_inp}. Please specify a 3D vector, an "
f"atom name or `centroid`"
)
return self._clash_ori
@clash_ori.setter
def clash_ori(self, inp):
self._clash_ori_inp = inp
@property
def side_chain_idx(self):
"""Indices of the atoms that correspond to the side chain atoms (e.g. CB, CG, etc. and not N, CA, C)"""
if not hasattr(self, '_side_chain_idx'):
self._side_chain_idx = np.argwhere(
np.isin(self.atom_names, dRotamerEnsemble.backbone_atoms, invert=True)
).flatten()
return self._side_chain_idx
@property
def bonds(self):
"""Array of intra-label atom pairs indices that are covalently bonded."""
if not hasattr(self, "_bonds"):
bonds = []
for bond in self.RE1.bonds:
bndin = np.isin(bond, self.rl1mask)
if np.all(bndin):
bonds.append(bond)
elif np.any(bndin):
bonds.append([bond[0], np.argwhere(self.atom_names == self.RE1.atom_names[bond[1]]).flat[0]])
else:
bonds.append([np.argwhere(self.atom_names == self.RE1.atom_names[bond[0]]).flat[0],
np.argwhere(self.atom_names == self.RE1.atom_names[bond[1]]).flat[0]])
for bond in self.RE2.bonds:
bndin = np.isin(bond, self.rl2mask)
if np.all(bndin):
bonds.append([b + len(self.rl1mask) for b in bond])
elif not bndin[1]:
bonds.append([bond[0] + len(self.rl1mask),
np.argwhere(self.atom_names == self.RE2.atom_names[bond[1]]).flat[0]])
self._bonds = np.array(sorted(set(map(tuple, bonds))), dtype=int)
return self._bonds
@bonds.setter
def bonds(self, inp):
"""
Set of intra-label bonded pairs.
Parameters
----------
inp : ArrayLike
List of atom ID pairs that are bonded
"""
self._bonds = set(tuple(i) for i in inp)
idxs = np.arange(len(self.atom_names))
all_pairs = set(combinations(idxs, 2))
self._non_bonded = all_pairs - self._bonds
@property
def non_bonded(self):
""" Array of indices of intra-label non-bonded atom pairs. Primarily used for internal clash evaluation when
sampling the dihedral space"""
if not hasattr(self, "_non_bonded"):
pairs = {v.index: [path for path in self._graph.get_all_shortest_paths(v) if
len(path) <= (self._exclude_nb_interactions)] for v in self._graph.vs}
pairs = {(a, c) for a in pairs for b in pairs[a] for c in b if a < c}
all_pairs = set(combinations(range(len(self.atom_names)), 2))
self._non_bonded = all_pairs - pairs
return sorted(list(self._non_bonded))
@non_bonded.setter
def non_bonded(self, inp):
"""
Create a set of non-bonded atom pair indices within a single side chain
Parameters
----------
inp : ArrayLike
List of atom ID pairs that are not bonded
"""
self._non_bonded = set(tuple(i) for i in inp)
idxs = np.arange(len(self.atom_names))
all_pairs = set(combinations(idxs, 2))
self._bonds = all_pairs - self._non_bonded
def protein_setup(self):
if isinstance(self.protein, (mda.AtomGroup, mda.Universe)):
if not hasattr(self.protein.universe._topology, "altLocs"):
self.protein.universe.add_TopologyAttr('altLocs', np.full(len(self.protein.universe.atoms), ""))
if self.ignore_waters:
self.protein = self.protein.select_atoms("not (byres name OH2 or resname HOH)")
else:
self.protein = self.protein.atoms
clash_ignore_idx = self.protein.select_atoms(f"resid {self.site1} {self.site2} and segid {self.chain}").ix
self.clash_ignore_idx = np.argwhere(np.isin(self.protein.ix, clash_ignore_idx)).flatten()
self.resindex = self.protein.select_atoms(self.selstr).residues[0].resindex
self.segindex = self.protein.select_atoms(self.selstr).residues[0].segindex
if self.protein_tree is None:
self.protein_tree = cKDTree(self.protein.atoms.positions)
protein_clash_idx = self.protein_tree.query_ball_point(
self.clash_ori, self.clash_radius
)
self.protein_clash_idx = [
idx for idx in protein_clash_idx if idx not in self.clash_ignore_idx
]
self.aidx, self.bidx = [list(x) for x in zip(*self.non_bonded)]
if hasattr(self.energy_func, 'prepare_system'):
self.energy_func.prepare_system(self)
if self._minimize:
self.minimize()
if self.eval_clash:
self.evaluate()
[docs]
def guess_chain(self):
"""
Function to guess the chain based off of the attached protein and residue number.
Returns
-------
chain : str
The chain name
"""
if self.protein is None:
chain = "A"
elif len(set(self.protein.segments.segids)) == 1:
chain = self.protein.segments.segids[0]
elif np.isin(self.protein.residues.resnums, self.site1).sum() == 0:
raise ValueError(
f"Residue {self.site1} is not present on the provided protein"
)
elif np.isin(self.protein.residues.resnums, self.site1).sum() == 1:
chain = self.protein.select_atoms(f"resid {self.site1}").segids[0]
else:
raise ValueError(
f"Residue {self.site1} is present on more than one chain. Please specify the desired chain"
)
return chain
[docs]
def get_lib(self, rotlib):
"""
Specil function to get a rotamer library for a dRotamerEnsemble and apply all attributes to the object
instance. ``rotlib`` can be a residue name, a file name, or a dictionary containing all the standard entries
of a rotamer library.
Parameters
----------
rotlib : Union[Path, str, dict]
The name of the residue, the Path to the rotamer library file or a dictionary containing all entries of a
rotamer library
"""
# If given a dictionary use that as the rotlib
if isinstance(rotlib, dict):
if tuple(sorted(rotlib.keys())) != ('csts', 'libA', 'libB'):
raise RuntimeError('Non-file dRotamerEnsemble rotlibs must be dictionaries consisting of exactle 3 '
'entries: `csts`, `libA` and `libB`')
self.csts = rotlib['csts']
self.libA, self.libB = rotlib['libA'], rotlib['libB']
self.kwargs["eval_clash"] = False
return None
rotlib = self.res if rotlib is None else rotlib
if 'ip' not in rotlib:
rotlib += f'ip{self.increment}'
# Check if any exist
rotlib_path = io.get_possible_rotlibs(rotlib, suffix='drotlib', extension='.zip')
if rotlib_path is None:
# Check if libraries exist but for different i+n
rotlib_path = io.get_possible_rotlibs(rotlib.replace(f'ip{self.increment}', ''),
suffix='drotlib',
extension='.zip',
return_all=True)
if rotlib_path is None:
raise NameError(f'There is no rotamer library called {rotlib} in this directory or in chilife')
else:
warnings.warn(f'No rotamer library found for the given increment (ip{self.increment}) but rotlibs '
f'were found for other increments. chiLife will combine these rotlib to model '
f'this site1 pair and but they may not be accurate! Because there is no information '
f'about the relative weighting of different rotamer libraries all weights will be '
f'set to 1/len(rotlib)')
if isinstance(rotlib_path, Path):
libA, libB, csts = io.read_library(rotlib_path)
elif isinstance(rotlib_path, list):
cctA, cctB, ccsts = {}, {}, {}
libA, libB, csts = io.read_library(rotlib_path[0])
unis = []
for lib in (libA, libB):
names, types = lib['atom_names'], libA['atom_types']
residxs = np.zeros(len(names), dtype=int)
resnames, resids = np.array([self.res]), np.array([1])
segidx = np.array([0])
uni = make_mda_uni(names, types, resnames, residxs, resids, segidx)
unis.append(uni)
for p in rotlib_path:
tlibA, tlibB, tcsts = io.read_library(p)
for lib, tlib, cct, uni in zip((libA, libB), (tlibA, tlibB), (cctA, cctB), unis):
# Libraries must have the same atom order
if not np.all(np.isin(tlib['atom_names'], lib['atom_names'])) and \
np.all(tlib['dihedral_atoms'] == lib['dihedral_atoms']):
raise ValueError(f'Rotlibs {rotlib_path[0].stem} and {p.stem} are not compatable. You may'
f'need to rename one of them.')
# Map coordinates
ixmap = [np.argwhere(tlib['atom_names'] == aname).flat[0] for aname in lib['atom_names']]
cct.setdefault('coords', []).append(tlib['coords'][:, ixmap])
# Create new internal coords if they are defined differently
lib_ic, tlib_ic = lib['internal_coords'], tlib['internal_coords']
if np.any(lib_ic.atom_names != tlib_ic.atom_names):
uni.load_new(cct['coords'][-1])
tlib_ic = MolSysIC.from_atoms(uni, lib['dihedral_atoms'], lib_ic.bonds)
tlib['zmats'] = tlib_ic.trajectory.coordinate_array
cct.setdefault('dihedrals', []).append(tlib['dihedrals'])
cct.setdefault('zmats', []).append(tlib['zmats'])
for field in ('dihedrals', 'coords', 'zmats'):
libA[field] = np.concatenate(cctA[field])
libB[field] = np.concatenate(cctB[field])
libA['internal_coords'].load_new(libA.pop('zmats'))
libB['internal_coords'].load_new(libB.pop('zmats'))
libA['weights'] = libB['weights'] = np.ones(len(libA['coords'])) / len(libA['coords'])
self.csts = csts
self.libA, self.libB = libA, libB
self.kwargs["eval_clash"] = False
[docs]
def create_ensembles(self):
"""Creates monofunctional components of the bifunctional rotamer ensemble."""
self.RE1 = re.RotamerEnsemble(self.res,
self.site1,
self.protein,
self.chain,
self.libA,
**self.kwargs)
self.RE2 = re.RotamerEnsemble(self.res,
self.site2,
self.protein,
self.chain,
self.libB,
**self.kwargs)
[docs]
def save_pdb(self, name: str = None):
"""
Save a PDB file of the ensemble
Parameters
----------
name: str
Name of the PDB file
"""
if name is None:
name = self.name + ".pdb"
if not name.endswith(".pdb"):
name += ".pdb"
io.save(name, self.RE1, self.RE2)
[docs]
def minimize(self, callback=None):
"""
Minimize rotamers in dihedral space in the current context. Performed by default unless the ``minimize=False``
keyword argument is used during construction. Note that the minimization method is controlled by the
Parameters
----------
callback: Callable
A callable function to be passed as the ``scipy.optimize.minimize`` function. See the
`scipy documentation <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_
for details.
"""
scores = [self._min_one(i, ic1, ic2, callback=callback) for i, (ic1, ic2) in
enumerate(zip(self.RE1.internal_coords, self.RE2.internal_coords))]
scores = np.asarray(scores)
SSEs = np.linalg.norm(self.RE1.coords[:, self.cst_idx1] - self.RE2.coords[:, self.cst_idx2], axis=2).sum(axis=1)
MSD = SSEs / len(self.csts)
MSDmin = MSD.min()
if MSDmin > 0.1:
warnings.warn(f'The minimum MSD of the cap is {MSD.min()}, this may result in distorted label. '
f'Check that the structures make sense.')
if MSDmin > 0.25:
raise RuntimeError(f'chiLife was unable to connect residues {self.site1} and {self.site2} with {self.res}. '
f'Please double check that this is the intended labeling site. It is likely that these '
f'sites are too far apart.')
self.cap_MSDs = MSD
self.RE1.backbone_to_site()
self.RE2.backbone_to_site()
self.score_base = scores.min()
scores -= scores.min()
self.rotamer_scores = scores + self.score_base
self.weights *= np.exp(-scores / (scoring.GAS_CONST * self.temp) / np.exp(-scores).sum())
self.weights /= self.weights.sum()
def _objective(self, dihedrals, ic1, ic2):
"""
Objective function to optimize for each rotamer in the ensemble.
Parameters
----------
dihedrals: ArrayLike
Dihedral values
ic1, ic2: chiLife.MolSysIC
Internal coordinates object for the two mono functional subunits of the bifunctional label.
Returns
-------
score: float
Rotamer energy score for the current conformation
"""
ic1.set_dihedral(dihedrals[: len(self.RE1.dihedral_atoms)], 1, self.RE1.dihedral_atoms)
coords1 = ic1.to_cartesian()[self.RE1.ic_mask]
ic2.set_dihedral(dihedrals[-len(self.RE2.dihedral_atoms):], 1, self.RE2.dihedral_atoms)
coords2 = ic2.to_cartesian()[self.RE2.ic_mask]
diff = np.linalg.norm(coords1[self.cst_idx1] - coords2[self.cst_idx2], axis=1)
ovlp = (coords1[self.cst_idx1] + coords2[self.cst_idx2]) / 2
coords = np.concatenate([coords1[self.rl1mask], coords2[self.rl2mask], ovlp], axis=0)
r = np.linalg.norm(coords[self.aidx] - coords[self.bidx], axis=1)
# Faster to compute lj here
lj = self.irmin_ij / r
lj = lj * lj * lj
lj = lj * lj
# attractive forces are needed, otherwise this term will perpetually push atoms apart
internal_energy = self.ieps_ij * (lj * lj - 2 * lj)
score = (diff @ diff) * self.restraint_weight / len(diff) + internal_energy.sum()
return score
def _min_one(self, i, ic1, ic2, callback=None):
"""
Helper function to use when dispatching minimization jobs or each rotamer.
Parameters
----------
i: int
rotamer index
ic1, ic2: chiLife.MolSysIC
Internal coordinates object for the two mono functional subunits of the bifunctional label.
callback: Callable
A callable function to be passed as the ``scipy.optimize.minimize`` function. See the
`scipy documentation <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_
for details.
Returns
-------
score: float
Energy score of the minimized rotamer.
"""
if callback is not None:
if 'i' in inspect.signature(callback).parameters:
callback = partial(callback, i=i)
d0 = np.concatenate([ic1.get_dihedral(1, self.RE1.dihedral_atoms),
ic2.get_dihedral(1, self.RE2.dihedral_atoms)])
lb = d0 - np.pi # np.deg2rad(40)
ub = d0 + np.pi # np.deg2rad(40) #
bounds = np.c_[lb, ub]
xopt = opt.minimize(self._objective, x0=d0, args=(ic1, ic2),
bounds=bounds, method=self.min_method,
callback=callback)
self.RE1._coords[i] = ic1.coords[self.RE1.H_mask]
self.RE2._coords[i] = ic2.coords[self.RE2.H_mask]
tors = d0 - xopt.x
tors = np.arctan2(np.sin(tors), np.cos(tors))
tors = np.sqrt(tors @ tors)
return xopt.fun + tors * self.torsion_weight
[docs]
def trim_rotamers(self):
"""Remove low probability rotamers from the ensemble. All rotamers accounting for the population less than
``self.trim_tol`` will be removed."""
arg_sort_weights = np.argsort(self.weights)[::-1]
sorted_weights = self.weights[arg_sort_weights]
cumulative_weights = np.cumsum(sorted_weights)
cutoff = np.maximum(1, len(cumulative_weights[cumulative_weights < 1 - self.trim_tol]))
keep_idx = arg_sort_weights[:cutoff]
if hasattr(self, 'cap_MSDs'):
self.cap_MSDs = self.cap_MSDs[keep_idx]
self.rotamer_scores = self.rotamer_scores[keep_idx]
if hasattr(self, 'rot_clash_energy'):
self.rot_clash_energy = self.rot_clash_energy[keep_idx]
self.RE1.trim_rotamers(keep_idx=keep_idx)
self.RE2.trim_rotamers(keep_idx=keep_idx)
[docs]
def evaluate(self):
"""Place rotamer ensemble on protein site and recalculate rotamer weights."""
# Calculate external energies
energies = self.energy_func(self)
self.rot_clash_energy = energies
# Calculate total weights (combining internal and external)
self.weights, self.partition = scoring.reweight_rotamers(energies, self.temp, self.weights)
logging.info(f"Relative partition function: {self.partition:.3}")
# Remove low-weight rotamers from ensemble
if self._do_trim:
self.trim_rotamers()
def __len__(self):
"""Return the length of the rotamer library (number of rotamers)"""
return len(self.RE1.coords)
[docs]
def copy(self):
"""Create a deep copy of the dRotamerEnsemble object"""
new_copy = dRotamerEnsemble(self.res, (self.site1, self.site2), chain=self.chain,
protein=self.protein,
rotlib={'csts': self.csts, 'libA': self.libA, 'libB': self.libB},
minimize=False,
eval_clash=False)
for item in self.__dict__:
if isinstance(self.dict[item], np.ndarray):
new_copy.__dict__[item] = self.__dict__[item].copy()
elif item == 'RE1':
new_copy.__dict__[item] == self.__dict__[item].copy(rotlib=self.libA)
elif item == 'RE2':
new_copy.__dict__[item] == self.__dict__[item].copy(rotlib=self.libB)
elif item == 'protein':
pass
else:
new_copy.__dict__[item] = deepcopy(self.__dict__[item])
return new_copy
def proc_sites(sites):
"""
Processes site information provided by user into a format compatible with the dRotamerEnsemble object.
Parameters
----------
sites: ArrayLike
Sites provided by user
Returns
-------
new_sites: ArrayLike
Sites compatible with dRotamerEnsemble.
"""
sites = sorted(sites)
new_sites = []
for site in sites:
if isinstance(site, str):
site_split = re.split('(\D+)',site)
site = site_split[0]
icode = site_split[1] if len(site_split) > 1 else ""
else:
icode = ""
new_sites.append(site)
new_sites.append(icode)
return new_sites
def dassign_defaults(kwargs):
"""
Helper function to assign default values to kwargs that have not been explicitly assigned by the user. Also
checks to make sure that the user provided kwargs are real kwargs.
Parameters
----------
kwargs : dict
Dictionary of user supplied keyword arguments.
Returns
-------
kwargs : dict
Dictionary of user supplied keyword arguments augmented with defaults for all kwargs the user did not supply.
"""
# Make all string arguments lowercase
for key, value in kwargs.items():
if isinstance(value, str):
kwargs[key] = value.lower()
# Default parameters
defaults = {
"eval_clash": True,
"temp": 298,
"clash_radius": None,
"protein_tree": None,
"_clash_ori_inp": kwargs.pop("clash_ori", "cen"),
"alignment_method": "bisect",
"dihedral_sigmas": 25,
"use_H": False,
"_exclude_nb_interactions": kwargs.pop('exclude_nb_interactions', 3),
"energy_func": scoring.ljEnergyFunc(scoring.get_lj_energy, 'charmm', forgive=0.95)
,
"_minimize": kwargs.pop('minimize', True),
"min_method": 'L-BFGS-B',
"_do_trim": kwargs.pop('trim', True),
"trim_tol": 0.005,
"restraint_weight": kwargs.pop('restraint_weight', 222),
"torsion_weight": 5,
"ignore_waters": True,
}
# Overwrite defaults
kwargs_filled = {**defaults, **kwargs}
# Make sure there are no unused parameters
if len(kwargs_filled) != len(defaults):
raise TypeError(
f"Got unexpected keyword argument(s): "
f'{", ".join(key for key in kwargs if key not in defaults)}'
)
return kwargs_filled.items()