import re
from pathlib import Path
from copy import deepcopy
import warnings
from itertools import combinations
import numpy as np
from numpy.typing import ArrayLike
from scipy.spatial import cKDTree
import igraph as ig
import MDAnalysis as mda
import chilife.io as io
import chilife.scoring as scoring
from .protein_utils import new_chain, FreeAtom, guess_chain
from .ligand_utils import assign_atom_names, remap_sdf
from .alignment_methods import ligand_alignment_methods, linear_alignment
from .MolSys import MolSys, MolecularSystemBase
from .MolSysIC import MolSysIC
from .base_classes import Ensemble
from .math_utils import simple_cycle_vertices
default_energy_func = scoring.ljEnergyFunc()
[docs]
class LigandEnsemble(Ensemble):
"""Create new ``LigandEnsemble`` object.
Parameters
----------
res : string
3-character name of desired residue, e.g. SAM.
site : int
:class:`~MolSys` residue number to attach library to.
protein : MDAnalysis.Universe, MDAnalysis.AtomGroup, MolSys
Object containing all protein information (coords, atom types, etc.)
chain : str
:class:`~MolSys` chain identifier to attach spin label to.
rotlib : str
Rotamer library to use for constructing the ``LigandEnsemble``
**kwargs : dict
exclude_nb_interactions: int:
When calculating internal clashes, ignore 1-``exclude_nb_interactions`` interactions and below. Defaults
to ignore 1-3 interactions, i.e. atoms that are connected by 2 bonds or fewer will not have a steric
effect on each other.
eval_clash : bool
Switch to turn clash evaluation on (True) and off (False).
forcefield: str
Name of the forcefield you wish to use to parameterize atoms for the energy function. Currently, supports
`charmm` and `uff`. NOTE: chilife forcefields currently only parameterize clashes and not electrostatics
or bonded interactions.
energy_func : callable
Python function or callable object that takes a protein or an :class:`Ensemble` object as input and
returns an energy value (kcal/mol) for each atom of each rotamer in the ensemble. See also
:mod:`Scoring <chiLife.scoring>` . Defaults to a capped lennard-jones repulsive energy function.
temp : float
Temperature to use when running ``energy_func``. Only used if ``energy_func`` accepts a temperature
argument Defaults to 298 K.
clash_radius : float
Cutoff distance (angstroms) for inclusion of atoms in clash evaluations. This distance is measured from
``clash_ori`` Defaults to the longest distance between any two atoms in the ensemble plus 5
angstroms.
clash_ori : str
Atom selection to use as the origin when finding atoms within the ``clash_radius``. Defaults to 'cen',
the centroid of the ensemble heavy atoms.
ignore_wateres : bool
Determines whether water molecules (resnames HOH and OH2) are ignored during clash evaluation.
Defaults to True.
protein_tree : Scipy.spatial.cKDTree
KDTree of atom positions for fast distance calculations and neighbor detection. Defaults to None
trim: bool
When true, the lowest `trim_tol` fraction of rotamers in the ensemble will be removed.
trim_tol: float
Tolerance for trimming rotamers from the ensemble. ``trim_tol=0.005`` means the bottom 0.5% of rotamers
will be removed.
alignment_method : str
Method to use when attaching or aligning the ensemble with the specified site of the :class:`~MolSys`.
Defaults to ``'svd'`` which aligns the principal components of each ligand in the ensemble with the
specified residue of the associated :class:`MolSys`.
sample : int, bool
Argument to use the off-rotamer sampling method. If ``False`` or ``0`` the off-rotamer sampling method
will not be used. If ``int`` the ensemble will be generated with that many off-rotamer samples.
dihedral_sigmas : float, numpy.ndarray
Standard deviations of dihedral angles (degrees) for off rotamer sampling. Can be a single number for
isotropic sampling, a vector to define each dihedral individually or a matrix to define a value for
each rotamer and each dihedral. Setting this value to np.inf will force uniform (accessible volume)
sampling. Defaults to 35 degrees.
weighted_sampling : bool
Determines whether the rotamer ensemble is sampled uniformly or based off of their intrinsic weights.
Defaults to ``False``.
use_H : bool
Determines if hydrogen atoms are used or not. Defaults to ``True``.
"""
def __init__(self, res, site=None, protein=None, chain=None, rotlib=None, **kwargs):
self.res = res
if site is None and protein is not None:
raise ValueError(
"A protein has been provided but a site has not. If you wish to construct an ensemble "
"associated with a protein you must include the site you wish to model."
)
elif site is None:
site = 1
icode = ""
elif isinstance(site, str):
site_split = re.split(r"(\D+)", site)
site = site_split[0]
icode = site_split[1] if len(site_split) > 1 else ""
else:
icode = ""
self.site = int(site)
self.icode = icode
self.resnum = self.site
self.chain = chain if chain is not None else guess_chain(protein, self.site)
self.selstr = f"resid {self.site} and segid {self.chain} and not altloc B"
self.nataa = ""
self.input_kwargs = kwargs
self._sdf_data = kwargs.pop("_sdf_data", None)
self.__dict__.update(assign_defaults(kwargs))
self.input_kwargs.pop("energy_func", None)
self.protein = protein
# Convert string arguments for alignment_method to respective function
if isinstance(self.alignment_method, str):
self.alignment_method = ligand_alignment_methods[self.alignment_method]
lib = self.get_lib(rotlib)
self.__dict__.update(lib)
self._weights = self.weights / self.weights.sum()
if len(self.sigmas) != 0 and "dihedral_sigmas" not in kwargs:
self.sigmas[self.sigmas == 0] = self.dihedral_sigmas
else:
self.set_dihedral_sampling_sigmas(self.dihedral_sigmas)
self._rsigmas = np.deg2rad(self.sigmas)
with np.errstate(divide="ignore"):
self._rkappas = 1 / self._rsigmas**2
self._rkappas = np.clip(self._rkappas, 0, 1e16)
# Remove hydrogen atoms unless otherwise specified
if not self.use_H:
self.H_mask = self.atom_types != "H"
self._coords = self._coords[:, self.H_mask]
self.atom_types = self.atom_types[self.H_mask]
self.atom_names = self.atom_names[self.H_mask]
self.ic_mask = np.argwhere(
np.isin(self.internal_coords.atom_names, self.atom_names)
).flatten()
self._lib_coords = self._coords.copy()
self._lib_dihedrals = self._dihedrals.copy()
self._lib_IC = self.internal_coords
if self.clash_radius is None:
self.clash_radius = (
np.linalg.norm(self.clash_ori - self.coords, axis=-1).max() + 5
)
self._graph = ig.Graph(edges=self.bonds)
self.aidx, self.bidx = [list(x) for x in zip(*self.non_bonded)]
self.side_chain_idx = np.arange(len(self.atom_names))
# Allocate variables for clash evaluations
self.atom_energies = None
self.clash_ignore_idx = None
self.partition = 1
# Assign a name to the label
self.name = self.rotlib
if self.site is not None:
self.name = self.nataa + str(self.site) + self.rotlib
if self.chain not in ("A", None):
self.name += f"_{self.chain}"
self.update(no_lib=True)
# Store atom information as atom objects
self.atoms = [
FreeAtom(name, atype, idx, self.res, self.site, coord)
for idx, (coord, atype, name) in enumerate(
zip(self._coords[0], self.atom_types, self.atom_names)
)
]
[docs]
@classmethod
def from_sdf(
cls, file_name, res="LIG", site=None, protein=None, chain=None, **kwargs
):
"""
A function to create a LigandEnsemble from a SDF file.
Parameters
----------
file_name : str
The name of the SDF file.
Returns
-------
cls : LigandEnsemble
The LigandEnsemble object.
"""
chain = (
chain
if chain is not None
else guess_chain(protein, site)
if site is not None
else new_chain(protein)
)
site = 1 if site is None else site
sdf_data = io.read_sdf(file_name)
kwargs["_sdf_data"] = sdf_data
coords = np.array([[atom["xyz"] for atom in mol["atoms"]] for mol in sdf_data])
# Assuming all mols are identical
mol = sdf_data[0]
n_atoms = len(mol["atoms"])
n_conformers = len(coords)
atom_names = np.array(assign_atom_names(mol))
atom_types = np.array([atom["element"] for atom in mol["atoms"]])
resnames = np.array([res] * n_atoms)
resindices = np.zeros(n_atoms) * site
resnums = np.ones(n_atoms)
segindices = np.zeros(n_atoms)
record_types = np.array(["HETATM"] * n_atoms)
segids = np.array([chain] * n_atoms)
bonds = np.array([[bond["idx1"], bond["idx2"]] for bond in mol["bonds"]])
bond_types = np.array([bond["type"] for bond in mol["bonds"]])
btype_map = {tuple(bond): btype for bond, btype in zip(bonds, bond_types)}
G = ig.Graph(len(atom_names), bonds)
# Dont consider bonds to hydrogen atoms.
H_idxs = np.argwhere(atom_types == "H")
mobile_bonds = [bond for bond in bonds if ~np.any(np.isin(bond, H_idxs))]
# Map heavy atom degrees
neighbors = {}
for bond in mobile_bonds:
for b in bond:
if b not in neighbors:
neighbors[b] = []
nieghbor = bond[0] if b == bond[1] else bond[1]
neighbors[b].append(nieghbor)
# Only consider bonds that aren't part of the same ring
ring_idxs = simple_cycle_vertices(G)
mobile_bonds = [
bond
for bond in mobile_bonds
if ~np.any([np.all(np.isin(bond, r)) for r in ring_idxs])
]
# Only look at single bonds
mobile_bonds = [bond for bond in mobile_bonds if btype_map[tuple(bond)] == 1]
# Don't consider terminal mobile bonds (e.g. methyls or hydroxyls) or
significant_bonds = []
for bond in mobile_bonds:
if len(neighbors[bond[0]]) == 1:
continue
if len(neighbors[bond[1]]) == 1:
continue
significant_bonds.append(bond)
# Find a suitable root, make sure its only bonded to one atom that is at max degree 3
for root, neigh in neighbors.items():
if len(neigh) == 1 and len(neighbors[neigh[0]]) < 4:
break
# DFS to sort atoms
sort_idx, layer_idx, parents = G.bfs(root)
# Move hydrogen atoms to end
v = {idx: int(atom_types[idx] == "H") for idx in sort_idx}
sort_idx = sorted(sort_idx, key=lambda x: v[x])
# Ensure at least 4 consecutive atoms starting the ligand so the 4th atom doesn't need an improper
if (
parents[sort_idx[2]] == parents[sort_idx[3]]
and parents[sort_idx[4]] != sort_idx[3]
):
sort_idx[3], sort_idx[4] = sort_idx[4], sort_idx[3]
# Create maps so original order can be preserved
forward_map = {n: i for i, n in enumerate(sort_idx)}
back_map = np.argsort(sort_idx)
# Sort the atoms
atom_names = atom_names[sort_idx]
atom_types = atom_types[sort_idx]
coords = coords[:, sort_idx]
bonds = np.array([[forward_map[k1], forward_map[k2]] for k1, k2 in bonds])
# Create MolSys
molsys = MolSys.from_arrays(
atom_names,
atom_types,
resnames,
resindices,
resnums.astype(int),
segindices.astype(int),
record_types,
segids=segids,
trajectory=coords,
bonds=np.array(bonds),
bond_types=np.array(
[io.SDF_BOND_TO_XL[bond_type] for bond_type in bond_types]
),
)
# Create internal coords
ICs = MolSysIC.from_atoms(molsys, bonds=bonds, bond_types=bond_types)
# Identify mobile dihedrals
dihedral_idxs = []
new_neighbors = {
forward_map[k]: [forward_map[vi] for vi in v] for k, v in neighbors.items()
}
new_significant_bonds = [
[forward_map[x] for x in bond] for bond in significant_bonds
]
for a, b in new_significant_bonds:
if a > b:
a, b = b, a
for a1 in new_neighbors[a]:
if a1 < a:
break
for b1 in new_neighbors[b]:
if b1 > b:
break
dihedral_idxs.append([a1, a, b, b1])
dihedral_idxs = [sorted(lst) for lst in dihedral_idxs]
dihedral_idxs = sorted(dihedral_idxs, key=sum)
mobile_dihedrals = [
[atom_names[xi] for xi in dihedral] for dihedral in dihedral_idxs
]
weights = kwargs.pop("weights", np.ones(n_conformers) / n_conformers)
dihedrals = np.array([ic.get_dihedral(1, mobile_dihedrals) for ic in ICs])
if len(dihedrals.shape) == 1:
dihedrals = dihedrals[:, None]
sigmas = kwargs.get("sigmas", np.array([]))
lib = {
"rotlib": f"{res}_from_sdf",
"resname": res,
"coords": np.atleast_3d(coords),
"internal_coords": ICs,
"weights": weights,
"atom_types": atom_types,
"atom_names": atom_names,
"dihedral_atoms": np.array(mobile_dihedrals),
"dihedrals": np.rad2deg(dihedrals),
"_rdihedrals": dihedrals,
"sigmas": sigmas,
"_rsigmas": np.deg2rad(sigmas),
"type": "chilife ligand ensemble library from an SDF file",
"_back_map": back_map,
"_forward_map": forward_map,
"format_version": 0.0,
}
# Name atoms
return cls(res, site, protein, chain, rotlib=lib, **kwargs)
@classmethod
def from_pdb(
cls, file_name, res="LIG", site=None, protein=None, chain=None, **kwargs
):
raise NotImplementedError
@classmethod
def from_rdkit(
cls, rdmol, res="LIG", site=None, protein=None, chain=None, **kwargs
):
raise NotImplementedError
[docs]
def to_sdf(self, file_name):
"""
Write LigandEnsemble to SDF file.
Parameters
----------
file_name: str | Path
Name or Path object of the SDF file.
"""
if self._sdf_data is None:
sdf_data = self.internal_coords.protein._make_sdf_data()[0]
else:
sdf_data = self._sdf_data[0]
if not self.use_H:
sdf_data = remap_sdf(sdf_data, self.H_mask)
if hasattr(self, "_back_map"):
coords = self.coords[:, self._backmap]
else:
coords = self.coords
mols = []
for conf in coords:
mol = {k: v for k, v in sdf_data.items()}
atoms = []
for atom, xyz in zip(mol["atoms"], conf):
new_atom = {k: v for k, v in atom.items() if k != "xyz"}
new_atom["xyz"] = xyz
atoms.append(new_atom)
mol["atoms"] = atoms
mols.append(mol)
io.write_sdf(mols, file_name)
[docs]
def update(self, no_lib=False):
"""
Reanalyze the ensemble in place. This is generally used to update the library if manually changing positions
of the library or the associated protein/molecular system object. If the ensemble was made with off-rotamer
sampling, new conformations will be sampled. If using the rotamer library method, the whole library will be
used by default (``no_lib=False``). If you wish to use just the conformers that were retrained after trimming
the original structure, set ``no_lib=True``.
Parameters
----------
no_lib: bool
Argument to toggle using the current ensemble as a library or using the underlying rotamer library. If
``no_lib=True`` chiLife will not use the underlying rotamer library.
"""
# Sample from library if requested
if self._sample_size and len(self.dihedral_atoms) > 0:
mask = self.sigmas.sum(axis=0) == 0
# Draw samples
self._coords, self.weights, self.internal_coords = self.sample(
self._sample_size, off_rotamer=~mask, return_dihedrals=True
)
dihedrals = np.asarray(
[
self.internal_coords.get_dihedral(1, self.dihedral_atoms)
for ts in self.internal_coords.trajectory
]
)
self._dihedrals = np.rad2deg(dihedrals)
elif not no_lib:
# Reset to library
self._coords = self._lib_coords.copy()
self._dihedrals = self._lib_dihedrals.copy()
self.internal_coords = self._lib_IC
self.weights = self._weights.copy()
if self.protein is not None:
self.protein_setup()
else:
self.ICs_to_site()
[docs]
def init_protein(self):
"""
Helper function to initialize important parameters for interacting with the associated protein/molecular system
"""
if isinstance(self.protein, (mda.AtomGroup, mda.Universe)):
if not hasattr(self.protein.universe._topology, "altLocs"):
self.protein.universe.add_TopologyAttr(
"altLocs", np.full(len(self.protein.universe.atoms), "")
)
# Position library at selected residue
self.resindex = self.protein.select_atoms(self.selstr).resindices[0]
self.segindex = self.protein.select_atoms(self.selstr).segindices[0]
if self.ignore_waters:
self._protein = self.protein.select_atoms(
"not (byres name OH2 or resname HOH)"
)
else:
self._protein = self.protein.atoms
self.protein_tree = cKDTree(self._protein.atoms.positions)
# Delete cached lennard jones parameters if they exist.
if hasattr(self, "ermin_ij"):
del self.ermin_ij
del self.eeps_ij
[docs]
def protein_setup(self):
"""
perform necessary operations to align the rotamer library to the specified site, analyze clashes with the
associated protein/molecular system, trim high energy rotamers, and adjust weights accordingly.
"""
self.to_site()
# Get weight of current or closest rotamer
clash_ignore_idx = self.protein.select_atoms(
f"resid {self.site} and segid {self.chain}"
).ix
self.clash_ignore_idx = np.argwhere(
np.isin(self.protein.ix, clash_ignore_idx)
).flatten()
protein_clash_idx = self.protein_tree.query_ball_point(
self.clash_ori, self.clash_radius
)
self.protein_clash_idx = [
idx for idx in protein_clash_idx if idx not in self.clash_ignore_idx
]
if hasattr(self.energy_func, "prepare_system"):
self.energy_func.prepare_system(self)
# Guess which conformer the native residue is if it matches the structure of the LigandEnsemble
if self._coords.shape[1] == len(self.clash_ignore_idx):
RMSDs = np.linalg.norm(
self._coords
- self.protein.atoms[self.clash_ignore_idx].positions[None, :, :],
axis=(1, 2),
)
idx = np.argmin(RMSDs)
self.current_weight = self.weights[idx]
else:
self.current_weight = 0
# Evaluate external clash energies and reweigh rotamers
if self._minimize and self.eval_clash:
raise RuntimeError(
"Both `minimize` and `eval_clash` options have been selected, but they are incompatible."
"Please select only one. Also note that minimize performs its own clash evaluations so "
"eval_clash is not necessary."
)
elif self.eval_clash:
self.evaluate()
elif self._minimize:
self.minimize()
[docs]
def sample(self, n=1, off_rotamer=False, **kwargs):
"""Randomly sample a rotamer in the library.
Parameters
----------
n : int
number of rotamers to sample.
off_rotamer : bool | List[bool]
If False the rotamer library used to construct the LigandEnsemble will be sampled using the exact
dihedrals. If True, all mobile dihedrals will undergo minor perturbations. A list indicates a per-dihedral
switch to turn on/off off-rotamer sampling of a particular dihedrals, corresponding to
``self.dihedral_atoms`` e.g. ``off_rotamer = [False, False, False, True, True]`` R1 will only sample
the last two mobile dihedral angles.
**kwargs : dict
return_dihedrals : bool
If True, sample will return a MolSysIC object of the sampled rotamer
remove_clashing : bool
If True, samples that have internal clashing atoms will be removed
Returns
-------
coords : ArrayLike
The 3D coordinates of the sampled rotamer(s)
new_weights : ArrayLike
New weights (relative) of the sampled rotamer(s)
ICs : List[chilife.MolSysIC] (Optional)
Internal coordinate (MolSysIC) objects of the rotamer(s).
"""
if not self.weighted_sampling:
idx = np.random.randint(len(self._weights), size=n)
else:
idx = np.random.choice(len(self._weights), size=n, p=self._weights)
if not hasattr(off_rotamer, "__len__"):
off_rotamer = (
[off_rotamer] * len(self.dihedral_atoms)
if len(self.dihedral_atoms) > 0
else [True]
)
else:
off_rotamer = off_rotamer[: len(self.dihedral_atoms)]
if not any(off_rotamer):
coords, weights = (
np.squeeze(self._lib_coords[idx]),
np.squeeze(self._weights[idx]),
)
if self.alignment_method is not None:
target_pos = self.protein.select_atoms(
f"resid {self.site} and segid {self.chain} and not altloc B"
).positions
mx, ori = self.alignment_method(target_pos)
# if self.alignment_method not in {'fit', 'rosetta'}:
old_mx, old_ori = self.alignment_method(coords)
old_ori, old_mx = old_ori, old_mx.transpose(0, 2, 1)
self._coords -= old_ori[:, None, :]
cmx = old_mx @ mx
coords = np.einsum("ijk,ikl->ijl", coords, cmx) + ori
return coords, weights
if hasattr(self, "internal_coords"):
returnables = self._off_rotamer_sample(idx, off_rotamer, **kwargs)
return [
np.squeeze(x) if isinstance(x, np.ndarray) else x for x in returnables
]
elif len(self.dihedral_atoms) == 0:
raise RuntimeError(
"This ensemble has no rotatable bonds and therefore can't sample new conformations"
)
else:
raise AttributeError(
f"{type(self)} objects require both internal coordinates ({type(self)}.internal_coords "
f"attribute) and dihedral standard deviations ({type(self)}.sigmas attribute) to "
f"perform off rotamer sampling"
)
def _off_rotamer_sample(self, idx, off_rotamer, **kwargs):
"""Perform off rotamer sampling. Primarily a helper function for :meth:`~LigandEnsemble.sample`
Parameters
----------
idx : int
Index of the rotamer of the rotamer library that will be perturbed.
off_rotamer : List[bool]
A list indicates a per-dihedral bools indicating which dihedral angles of ``self.dihedral_atoms`` will
have off rotamer sampling. ``self.dihedral_atoms`` e.g. ``off_rotamer = [False, False, False, True,
True]`` will only sample the last two mobile dihedral angles.
**kwargs : dict
return_dihedrals : bool
If True, sample will return a :class:`~MolSysIC` object of the sampled rotamer
Returns
-------
coords : ArrayLike
The 3D coordinates of the sampled rotamer(s)
new_weights : ArrayLike
New weights (relative) of the sampled rotamer(s)
ICs : List[chilife.MolSysIC] (Optional)
Internal coordinate (:class:`~MolSysIC`) objects of the rotamer(s).
"""
# Use accessible volume sampling if only provided a single rotamer
if len(self._weights) == 1 or np.all(np.isinf(self.sigmas)):
new_dihedrals = np.random.random((len(idx), len(off_rotamer))) * 2 * np.pi
new_weights = np.ones(len(idx))
# sample from von mises near rotamer unless more information is provided
elif self.skews is None:
new_dihedrals = np.random.vonmises(
self._rdihedrals[idx][:, off_rotamer],
self._rkappas[idx][:, off_rotamer],
)
new_weights = np.ones(len(idx))
# Sample from skewednorm if skews are provided
# else:
# deltas = skewnorm.rvs(a=self.skews[idx], loc=self.locs[idx], scale=self.sigmas[idx])
# pdf = skewnorm.pdf(deltas, a=self.skews[idx], loc=self.locs[idx], scale=self.sigmas[idx])
# new_dihedrals = np.deg2rad(self.dihedrals[idx] + deltas)
# new_weight = pdf.prod()
else:
new_weights = np.ones(len(idx))
new_weights = self._weights[idx] * new_weights
z_matrix = self._lib_IC.batch_set_dihedrals(
idx, new_dihedrals, 1, self.dihedral_atoms[off_rotamer]
)
fit_coords = self._lib_coords[idx]
ICs = self.internal_coords.copy()
ICs.load_new(z_matrix)
coords = ICs.protein.trajectory.coordinate_array[:, self.ic_mask]
# Realign new conformer with old conformer with basic RMSD alignment
if self.alignment_method is None:
ic_ori = coords.mean(axis=1)
mx, ori = linear_alignment(coords, fit_coords)
coords = (
ICs.protein.trajectory.coordinate_array - ic_ori[:, None, ...]
) @ mx + ori
nori = ori - np.einsum("ij,ijk->ik", ic_ori, mx)
ICs._chain_operators = [{0: {"mx": m, "ori": o}} for m, o in zip(mx, nori)]
ICs.protein.trajectory.coordinate_array = coords
coords = coords[:, self.ic_mask]
if kwargs.setdefault("remove_clashing", 2):
clash_dist = (
2
if isinstance(kwargs["remove_clashing"], bool)
else kwargs["remove_clashing"]
)
# Remove structures with internal clashes
dist = np.linalg.norm(coords[:, self.aidx] - coords[:, self.bidx], axis=2)
sidx = np.atleast_1d(
np.squeeze(np.argwhere(np.all(dist > clash_dist, axis=1)))
)
ICs.use_frames(sidx)
coords = coords[sidx]
new_weights = new_weights[sidx]
if kwargs.setdefault("return_dihedrals", False):
return coords, new_weights, ICs
else:
return coords, new_weights
[docs]
def evaluate(self):
"""Place rotamer ensemble on protein site and recalculate rotamer weights."""
# Calculate external energies
energies = self.energy_func(self)
# Calculate total weights (combining internal and external)
self.weights, self.partition = scoring.reweight_rotamers(
energies, self.temp, self.weights
)
# Remove low-weight rotamers from ensemble
if self._do_trim:
self.trim_rotamers()
[docs]
def trim_rotamers(self, keep_idx: ArrayLike = None) -> None:
"""
Remove rotamers with small weights from ensemble
Parameters
----------
keep_idx : ArrayLike[int]
Indices of rotamers to keep. If None rotamers will be trimmed based off of ``self.trim_tol``
"""
if keep_idx is None:
arg_sort_weights = np.argsort(self.weights)[::-1]
sorted_weights = self.weights[arg_sort_weights]
cumulative_weights = np.cumsum(sorted_weights)
cutoff = np.maximum(
1, len(cumulative_weights[cumulative_weights < 1 - self.trim_tol])
)
keep_idx = arg_sort_weights[:cutoff]
if len(self.weights) == len(self.internal_coords):
self.internal_coords = self.internal_coords.copy()
self.internal_coords.use_frames(keep_idx)
self._coords = self._coords[keep_idx]
self._dihedrals = self._dihedrals[keep_idx]
self.weights = self.weights[keep_idx]
if self.atom_energies is not None:
self.atom_energies = self.atom_energies[keep_idx]
# normalize weights
self.weights /= self.weights.sum()
[docs]
def to_rotlib(
self,
libname: str = None,
description: str = None,
comment: str = None,
reference: str = None,
) -> None:
"""
Saves the current :class:`~LigandEnsemble` as a rotamer library file.
Parameters
----------
libname : str
Name of the rotamer library.
description : str
Description of the rotamer library.
comment : str
Additional comments about the rotamer library.
reference : str
References associated with the rotamer library.
"""
if libname is None:
libname = self.name.lstrip("0123456789.- ")
if description is None:
description = (
"Rotamer library made with chiLife using `to_rotlib` method"
"of a LigandEnsemble."
)
ICs = self.internal_coords.copy()
# Remove chain operators to align all labels on backbone
coords = ICs.protein.trajectory.coordinate_array
lib = {
"rotlib": libname,
"resname": self.res,
"coords": coords,
"internal_coords": ICs,
"weights": self.weights,
"atom_types": self.atom_types.copy(),
"atom_names": self.atom_names.copy(),
"dihedral_atoms": self.dihedral_atoms,
"dihedrals": self.dihedrals,
"sigmas": self.sigmas,
"type": "chilife rotamer library",
"description": description,
"comment": comment,
"reference": reference,
"format_version": 1.4,
}
if hasattr(self, "spin_atoms"):
lib["spin_atoms"] = self.spin_atoms
lib["spin_weights"] = self.spin_weights
np.savez(Path().cwd() / f"{libname}_rotlib.npz", **lib, allow_pickle=True)
def __eq__(self, other):
"""Equivalence measurement between a spin label and another object. A SpinLabel cannot be equivalent to a
non-SpinLabel Two SpinLabels are considered equivalent if they have the same coordinates and weights"""
if not isinstance(other, LigandEnsemble):
return False
elif self._coords.shape != other._coords.shape:
return False
return np.all(np.isclose(self._coords, other._coords)) and np.all(
np.isclose(self.weights, other.weights)
)
[docs]
def set_site(self, site):
"""Assign ``LigandEnsemble`` to a new residue number or "site". If there is an associated protein/molecular
system the ``LigandEnsemble`` will be moved to that site. If not, only the LigandEnsemble residue number will
be changed.
Parameters
----------
site : int
Residue number to assign to the ``LigandEnsemble``.
"""
self.site = site
self.to_site()
[docs]
def copy(self, site: int = None, chain: str = None, rotlib: dict = None):
"""Create a deep copy of the ``LigandEnsemble`` object. Assign new site, chain or rotlib if desired. Useful
when labeling homo-oligomers or proteins with repeating units/domains.
Parameters
----------
site : int
New site number for the copy.
chain : str
New chain identifier for the copy.
rotlib : dict
New (base) rotamer library for the dict.
Returns
-------
new_copy : chilife.LigandEnsemble
A deep copy of the original RotamerLibrary
"""
new_copy = self._base_copy(self._rotlib)
for item in self.__dict__:
if isinstance(item, np.ndarray) or item == "internal_coords":
new_copy.__dict__[item] = self.__dict__[item].copy()
elif item not in ("_protein", "_lib_IC", "_rotlib"):
new_copy.__dict__[item] = deepcopy(self.__dict__[item])
elif self.__dict__[item] is None:
new_copy.protein = None
else:
new_copy.__dict__[item] = self.__dict__[item]
if site is not None:
new_copy.site = site
if chain is not None:
new_copy.chain = chain
return new_copy
def _base_copy(self, rotlib=None):
"""
Helper function for :meth:`~LigandEnsemble.copy`. Creates a copy of the current LigandEnsemble object with only
residue, site, chain, and rotlib information.
Parameters
----------
rotlib: str | Path
The name or ``Path`` object specifying the new rotamer library.
Returns
-------
LigandEnsemble
The new base copy of the ``LigandEnsemble`` object.
"""
return LigandEnsemble(self.res, self.site, rotlib=rotlib, chain=self.chain)
[docs]
def to_site(self, target_pos: ArrayLike = None) -> None:
"""
Method that performs the actual placement of the ``LigandEnsemble`` at a given site. Alignment will be
performed using the alignment method specified during object construction.
Parameters
----------
target_pos: ArrayLike
Optional array of points that specifies the site. If not provided, the ``self.site`` residue of the
associated protein/molecular system will be used.
"""
if self.alignment_method is None:
return None
if target_pos is None:
target_pos = self.protein.select_atoms(
f"resid {self.site} and segid {self.chain} and not altloc B"
).positions
mx, ori = self.alignment_method(target_pos)
old_mx, old_ori = self.alignment_method(self.coords)
old_ori, old_mx = old_ori, old_mx.transpose(0, 2, 1)
self._coords -= old_ori[:, None, :]
cmx = old_mx @ mx
self._coords = np.einsum("ijk,ikl->ijl", self._coords, cmx) + ori
self.ICs_to_site()
[docs]
def ICs_to_site(self):
"""Modify the internal coordinates to be aligned with the site that the LigandEnsemble is attached to"""
mx, ori = self.alignment_method(
self.internal_coords.protein.trajectory.coordinate_array
)
self.ic_ori, self.ic_mx = ori, np.swapaxes(mx, -1, -2)
new_mxs = self.ic_mx @ self.mx
new_ori = np.einsum("ij,ikj->ij", -self.ic_ori, new_mxs) + self.origin
op = [{0: {"mx": m, "ori": o}} for m, o in zip(new_mxs, new_ori)]
self.internal_coords.chain_operators = op
[docs]
def get_lib(self, rotlib):
"""
Helper function to find the rotlib and load it into memory.
Parameters
----------
rotlib : str | Path
The rotlib name or file location.
Returns
-------
lib : dict
The rotamer library data as a dictionary.
"""
was_none = True if rotlib is None else False
rotlib = self.res if rotlib is None else rotlib
# If the rotlib isn't a dict try to figure out where it is
if not isinstance(rotlib, dict):
trotlib = io.get_possible_rotlibs(
rotlib, suffix="rotlib", extension=".npz", was_none=was_none
)
if trotlib:
rotlib = trotlib
else:
raise NameError(
f"There is no rotamer library called {rotlib} in this directory or in chilife"
)
lib = io.read_library(rotlib, None, None)
# If rotlib is a dict, assume that dict is in the format that read_library`would return
else:
lib = rotlib
# Copy mutable library values (mostly np arrays) so that instances do not share data
lib = {
key: value.copy() if hasattr(value, "copy") else value
for key, value in lib.items()
}
# Modify library to be appropriately used with self.__dict__.update
self._rotlib = {
key: value.copy()
if hasattr(value, "copy") and key != "internal_coords"
else value
for key, value in lib.items()
}
lib["_coords"] = lib.pop("coords").copy()
lib["_dihedrals"] = lib.pop("dihedrals").copy()
if "skews" not in lib:
lib["skews"] = None
return lib
[docs]
def set_dihedral_sampling_sigmas(self, value):
"""
Helper function to assign new sigmas for off-rotamer sampling.
Parameters
----------
value : float | ArrayLike[float]
The sigma value(s) to assign. A single float will assign all dihedrals of all rotamers to the same sigma
(isotropic). An array with as many elements as dihedrals will assign different sigmas to each dihedral
(Anisotropic). An array of dimensions n_rots by n_dihedrals will assign different anisotropic sigmas to
different rotamers.
"""
value = np.asarray(value)
if value.shape == ():
self.sigmas = (
np.ones((len(self._weights), len(self.dihedral_atoms))) * value
)
elif len(value) == len(self.dihedral_atoms) and len(value.shape) == 1:
self.sigmas = np.tile(value, (len(self._weights), 1))
elif value.shape == (len(self._weights), len(self.dihedral_atoms)):
self.sigmas = value.copy()
else:
raise ValueError(
"`dihedral_sigmas` must be a scalar, an array the length of the `self.dihedral atoms` or "
"an array with the shape of (len(self.weights), len(self.dihedral_atoms))"
)
@property
def bonds(self):
"""Set of atom indices corresponding to the atoms that form covalent bonds"""
if not hasattr(self, "_bonds"):
icmask_map = {x: i for i, x in enumerate(self.ic_mask)}
self._bonds = np.array(
[
(icmask_map[a], icmask_map[b])
for a, b in self.internal_coords.bonds
if a in icmask_map and b in icmask_map
]
)
return self._bonds
@bonds.setter
def bonds(self, inp):
self._bonds = set(tuple(i) for i in inp)
idxs = np.arange(len(self.atom_names))
all_pairs = set(combinations(idxs, 2))
self._non_bonded = all_pairs - self._bonds
@property
def non_bonded(self):
"""Set of atom indices corresponding to the atoms that are not covalently bonded. Also excludes atoms that
have 1-n non-bonded interactions where ``n=self._exclude_nb_interactions`` . By default, 1-3 interactions are
excluded"""
if not hasattr(self, "_non_bonded"):
pairs = {
v.index: [
path
for path in self._graph.get_all_shortest_paths(v)
if len(path) <= (self._exclude_nb_interactions)
]
for v in self._graph.vs
}
pairs = {(a, c) for a in pairs for b in pairs[a] for c in b if a < c}
all_pairs = set(combinations(range(len(self.atom_names)), 2))
self._non_bonded = all_pairs - pairs
return sorted(list(self._non_bonded))
@non_bonded.setter
def non_bonded(self, inp):
self._non_bonded = set(tuple(i) for i in inp)
idxs = np.arange(len(self.atom_names))
all_pairs = set(combinations(idxs, 2))
self._bonds = all_pairs - self._non_bonded
def __len__(self):
"""Number of rotamers in the ensemble"""
return len(self.coords)
@property
def clash_ori(self):
"""Location to use as 'center' when searching for atoms with potential clashes"""
if isinstance(self._clash_ori_inp, (np.ndarray, list)):
if len(self._clash_ori_inp) == 3:
return self._clash_ori_inp
elif isinstance(self._clash_ori_inp, str):
if self._clash_ori_inp in ["cen", "centroid"]:
return self.centroid
elif (ori_name := self._clash_ori_inp.upper()) in self.atom_names:
return np.squeeze(self._coords[0][ori_name == self.atom_names])
else:
raise ValueError(
f"Unrecognized clash_ori option {self._clash_ori_inp}. Please specify a 3D vector, an "
f"atom name or `centroid`"
)
return self._clash_ori
@clash_ori.setter
def clash_ori(self, inp):
self._clash_ori_inp = inp
@property
def centroid(self):
"""Get the centroid of the whole ensemble."""
return self._coords.mean(axis=(0, 1))
@property
def coords(self):
"""Cartesian coordinates of all rotamers in the ensemble."""
return self._coords
@coords.setter
def coords(self, coords):
# TODO: Add warning about removing isomers
# Allow users to input a single rotamer
coords = coords if coords.ndim == 3 else coords[None, :, :]
# Check if atoms match
if coords.shape[1] == len(self.side_chain_idx):
tmp = np.array([self._coords[0].copy() for _ in coords])
tmp[:, self.side_chain_idx] = coords
coords = tmp
if coords.shape[1] != len(self.atoms):
raise ValueError(
"The number of atoms in the input array does not match the number of atoms of the residue"
)
self._coords = coords
self.internal_coords = self.internal_coords.copy()
self.internal_coords.set_cartesian_coords(coords, self.ic_mask)
# Check if they are all at the same site
self._dihedrals = np.rad2deg(
[ic.get_dihedral(1, self.dihedral_atoms) for ic in self.internal_coords]
)
if len(self.dihedral_atoms) == 1:
self._dihedrals = self.dihedrals[:, None]
# Apply uniform weights
self.weights = np.ones(len(self._dihedrals))
self.weights /= self.weights.sum()
@property
def dihedrals(self):
"""Values of the (mobile) dihedral angles of all rotamers in the ensemble"""
return self._dihedrals
@dihedrals.setter
def dihedrals(self, dihedrals):
dihedrals = np.asarray(dihedrals)
dihedrals = dihedrals if dihedrals.ndim == 2 else dihedrals[None, :]
if dihedrals.shape[1] != self.dihedrals.shape[1]:
raise ValueError(
"The input array does not have the correct number of dihedrals"
)
if len(dihedrals) != self.dihedrals.shape[0]:
warnings.warn(
"WARNING: Setting dihedrals in this fashion will set all bond lengths and angles to that "
"of the first rotamer in the library effectively removing stereo-isomers from the ensemble. It "
"will also set all weights to ."
)
# Apply uniform weights
self.weights = np.ones(len(dihedrals))
self.weights /= self.weights.sum()
idxs = [0 for _ in range(len(dihedrals))]
else:
idxs = np.arange(len(dihedrals))
self._dihedrals = dihedrals
z_matrix = self.internal_coords.batch_set_dihedrals(
idxs, np.deg2rad(dihedrals), 1, self.dihedral_atoms
)
self.internal_coords = self.internal_coords.copy()
self.internal_coords.load_new(z_matrix)
self._coords = self.internal_coords.protein.trajectory.coordinate_array.copy()[
:, self.ic_mask
]
@property
def mx(self):
"""The rotation matrix to rotate a residue from the local coordinate frame to the current residue. The local
coordinate frame is defined by the alignment method"""
if self.alignment_method is None:
mx, ori = linear_alignment(
self.internal_coords.protein.trajectory.coordinate_array[
:, self.ic_mask
],
self.coords,
)
else:
mx, ori = self.alignment_method(self.coords)
return mx
@property
def origin(self):
"""Origin of the local coordinate frame"""
if self.alignment_method is None:
mx, ori = linear_alignment(
self.internal_coords.protein.trajectory.coordinate_array[
:, self.ic_mask
],
self.coords,
)
else:
mx, ori = self.alignment_method(self.coords)
return ori
@property
def protein(self):
return self._protein
@protein.setter
def protein(self, protein):
if hasattr(protein, "atoms") and isinstance(
protein.atoms, (mda.AtomGroup, MolecularSystemBase)
):
self._protein = protein
self.init_protein()
self.nataa = self.protein.select_atoms(self.selstr)[0].resname
elif protein is None:
self._protein = protein
else:
raise ValueError(
"The input protein must be an instance of MDAnalysis.Universe, MDAnalysis.AtomGroup, or "
"chilife.MolSys"
)
def assign_defaults(kwargs):
"""
Helper function to assign default values to kwargs that have not been explicitly assigned by the user. Also
checks to make sure that the user provided kwargs are real kwargs.
Parameters
----------
kwargs : dict
Dictionary of user supplied keyword arguments.
Returns
-------
kwargs : dict
Dictionary of user supplied keyword arguments augmented with defaults for all kwargs the user did not supply.
"""
# Make all string arguments lowercase
for key, value in kwargs.items():
if isinstance(value, str):
kwargs[key] = value.lower()
# Default parameters
defaults = {
"protein_tree": None,
"temp": 298,
"clash_radius": None,
"_clash_ori_inp": kwargs.pop("clash_ori", "cen"),
"alignment_method": "svd",
"dihedral_sigmas": 35,
"weighted_sampling": False,
"eval_clash": True if not kwargs.get("minimize", False) else False,
"use_H": True,
"_exclude_nb_interactions": kwargs.pop("exclude_nb_interactions", 3),
"_sample_size": kwargs.pop("sample", False),
"energy_func": default_energy_func,
"_minimize": kwargs.pop("minimize", False),
"min_method": "L-BFGS-B",
"_do_trim": kwargs.pop("trim", True),
"trim_tol": 0.005,
"ignore_waters": True,
}
# Overwrite defaults
kwargs_filled = {**defaults, **kwargs}
# Make sure there are no unused parameters
if len(kwargs_filled) != len(defaults):
raise TypeError(
f"Got unexpected keyword argument(s): "
f'{", ".join(key for key in kwargs if key not in defaults)}'
)
return kwargs_filled.items()