Source code for chilife.protein_utils

import os
import pickle
import math

from pathlib import Path
from typing import Set, List, Union, Tuple

from numpy.typing import ArrayLike
from scipy.spatial.distance import cdist
from scipy.spatial import cKDTree
from dataclasses import dataclass

try:
    from rdkit import Chem
    from rdkit.Chem import AllChem
    rdkit_found = True
except:
    rdkit_found = False

import MDAnalysis
import numpy as np
from scipy.spatial.transform import Rotation

from MDAnalysis.core.topologyattrs import Atomindices, Resindices, Segindices, Segids
import MDAnalysis as mda

from .globals import SUPPORTED_RESIDUES
from .MolSys import MolecularSystemBase, MolSys, concat_molsys
from .numba_utils import get_sasa, _ic_to_cart
from .pdb_utils import get_backbone_atoms, get_bb_candidates
import chilife as xl

import chilife.scoring as scoring
import chilife.io as io
import chilife.RotamerEnsemble as re

from .base_classes import Ensemble

import igraph as ig



[docs]
def get_dihedral_rotation_matrix(theta: float, v: ArrayLike) -> ArrayLike:
    """Build a matrix that will rotate coordinates about a vector, v, by theta in radians.

    Parameters
    ----------
    theta : float
        Rotation angle in radians.
    v : (3,) ArrayLike
        Three dimensional vector to rotate about.

    Returns
    -------
    rotation_matrix : np.ndarray
            Matrix that will rotate coordinates about the vector, V by angle theta.
    """

    # Normalize input vector
    v = v / np.linalg.norm(v)

    # Compute Vx matrix
    Vx = np.zeros((3, 3))
    Vx[[2, 0, 1], [1, 2, 0]] = v
    Vx -= Vx.T

    # Rotation matrix. See https://en.wikipedia.org/wiki/Rotation_matrix#Rotation_matrix_from_axis_and_angle
    rotation_matrix = (
            np.identity(3) * np.cos(theta)
            + np.sin(theta) * Vx
            + (1 - np.cos(theta)) * np.outer(v, v)
    )

    return rotation_matrix




[docs]
def get_dihedral(p: ArrayLike) -> float:
    """Calculates dihedral of a given set of atoms, ``p`` . Returns value in radians.

     .. code-block:: python

                         3
              ------>  /
             1-------2
           /
         0

    Parameters
    ----------
    p : (4, 3) ArrayLike
        Matrix containing coordinates to be used to calculate dihedral.

    Returns
    -------
    dihedral : float
        Dihedral angle in radians.
    """

    # Unpack p
    p0, p1, p2, p3 = p

    # Define vectors from coordinates
    b0 = -1.0 * (p1 - p0)
    b1 = p2 - p1
    b2 = p3 - p2

    # Normalize dihedral bond vector
    b1 /= np.linalg.norm(b1)

    # Calculate dihedral projections orthogonal to the bond vector
    v = b0 - np.dot(b0, b1) * b1
    w = b2 - np.dot(b2, b1) * b1

    # Calculate angle between projections
    x = np.dot(v, w)
    y = np.dot(np.cross(b1, v), w)

    dihedral = math.atan2(y, x)

    return dihedral




[docs]
def get_dihedrals(p1: ArrayLike, p2: ArrayLike, p3: ArrayLike, p4: ArrayLike) -> ArrayLike:
    """Vectorized version of get_dihedral

    Parameters
    ----------
    p1 : ArrayLike
        Array containing coordinates of the first point in the dihedral.
    p2 : ArrayLike
        Array containing coordinates of the second point in the dihedral
    p3 : ArrayLike
        Array containing coordinates of the third point in the dihedral
    p4 : ArrayLike
        Array containing coordinates of the fourth point in the dihedral

    Returns
    -------
    dihedrals : ArrayLike
        Dihedral angles in radians.
    """

    # Define vectors from coordinates
    b0 = p1 - p2
    b1 = p3 - p2
    b2 = p4 - p3

    # Normalize dihedral bond vector
    b1 /= np.linalg.norm(b1, axis=-1)[:, None]

    # Calculate dihedral projections orthogonal to the bond vector
    v = b0 - np.einsum('ij,ij->i', b0, b1)[:, None] * b1
    w = b2 - np.einsum('ij,ij->i',b2, b1)[:, None] * b1

    # Calculate angle between projections
    x = np.einsum('ij,ij->i', v, w)
    y = np.einsum('ij,ij->i', np.cross(b1, v, axis=-1), w)

    dihedral = np.arctan2(y, x)

    return dihedral




[docs]
def get_angle(p: ArrayLike) -> float:
    r"""Calculate the angle created by 3 points.

     .. code-block:: python

               p2
             / θ \
          p1      p3

    Parameters
    ----------
    p: ArrayLike :
        Array of three points to calculate the angle between.

    Returns
    -------
    angle : float
        Angle created by the three points.

    """
    p1, p2, p3 = p
    v1 = p1 - p2
    v2 = p3 - p2
    X = v1 @ v2
    Y = np.cross(v1, v2)
    Y = math.sqrt(Y @ Y)

    angle = math.atan2(Y, X)

    return angle




[docs]
def get_angles(p1: ArrayLike, p2: ArrayLike, p3: ArrayLike) -> ArrayLike:
    r"""Vectorized version of get_angle.

    Parameters
    ----------
    p1: ArrayLike :
        Array of first points in the angles.
    p2: ArrayLike :
        Array of second points in the angles.
    p3: ArrayLike :
        Array of third points in angle.


    Returns
    -------
    angles : float
        Array of anlges.

    """
    v1 = p1 - p2
    v2 = p3 - p2
    X =  np.einsum('ij,ij->i', v1, v2)
    Y = np.cross(v1, v2, axis=-1)
    Y = np.sqrt((Y * Y).sum(axis=-1))

    angles = np.arctan2(Y, X)

    return angles




[docs]
def set_dihedral(p: ArrayLike, angle: float, mobile: ArrayLike) -> ArrayLike:
    """Sets the dihedral angle by rotating all ``mobile`` atoms from their current position about the dihedral bond
    defined by the four atoms in ``p`` . Dihedral will be set to the value of ``angle`` in degrees.

    Parameters
    ----------
    p : ArrayLike
        Coordinates of atoms that define dihedral to rotate about.
    angle : float
        New angle to set the dihedral to (degrees).
    mobile : np.ndarray
        Atom coordinates to move by setting dihedral.

    Returns
    -------
    new_mobile : np.ndarray
        New positions for the mobile atoms
    """

    current = get_dihedral(p)
    angle = np.deg2rad(angle) - current
    angle = angle

    ori = p[1]
    mobile -= ori
    v = p[2] - p[1]
    v /= np.linalg.norm(v)
    R = get_dihedral_rotation_matrix(angle, v)

    new_mobile = R.dot(mobile.T).T + ori

    return new_mobile




[docs]
def guess_chain(protein, site):
    """
    Reads chain from protein or makes an educated guess on which chain a particular site resides on for a given
    Protein/Universe/AtomGroup.

    Parameters
    ----------
    protein : mda.Universe | mda.AtomGroup | chilife.MolSys
        The protein being labeled.
    site :  int
        The residue being labeled.
    Returns
    -------
    chain : str
        Best guess for the chain on which the selected residue resides.

    """
    if protein is None:
        chain = "A"
    elif len(set(protein.segments.segids)) == 1:
        chain = protein.segments.segids[0]
    elif np.isin(protein.residues.resnums, site).sum() == 0:
        raise ValueError(f"Residue {site} is not present on the provided protein")
    elif np.isin(protein.residues.resnums, site).sum() == 1:
        chain = protein.select_atoms(f"resid {site}").segids[0]
    else:
        raise ValueError(
            f"Residue {site} is present on more than one chain. Please specify the desired chain"
        )
    return chain




[docs]
def new_chain(protein):
    """Get the next unique chain identifier from a molecular system. Used for creating new chain IDs for
    LigandEnsembles

    Parameters
    ----------
    protein: Union[mda.Universe, mda.AtomGroup, chilife.MolSys]
        The protein/molecular system with chain IDs that should not be used.

    Returns
    -------
    chain : str
        The next unique chain identifier.
    """
    if protein is None:
        return "A"

    segids = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
    for seg in protein.segments:
        segids.remove(seg.segid)

    chain = segids.pop(0)
    return chain




[docs]
def guess_mobile_dihedrals(ICs, aln_atoms=None, mask=False):
    """
    Guess the flexible, uniqe, side chain dihedrals of a MolSysIC object.

    Parameters
    ----------
    ICs : MolSysIC
        Internal coordinates object you wish to guess dihedrals for.

    aln_atoms : List[str]
        List of atom names corresponding to the "alignment atoms" of the molecule. These are usually the core backbone
        atoms, e.g. N CA C for a protein.

    mask : bool
        Return a boolean array to mask the internal coordinates z-matrix.

    Returns
    -------
    dihedral_defs : List[List[str]]
        List of unique mobile dihedral definitions
    """
    if aln_atoms is not None:
        root_idx = np.argwhere(ICs.atom_names == aln_atoms[1]).flat[0]
        aname_lst = ICs.atom_names.tolist()
        neighbor_idx = [aname_lst.index(a) for a in aln_atoms[::2]]
        aln_idx = [neighbor_idx[0], root_idx, neighbor_idx[1]]
        bb_atoms = aln_idx + get_backbone_atoms(ICs.topology.graph, root_idx, neighbor_idx, sorted_args=aln_idx)
        bb_atoms = [ICs.atom_names[i] for i in bb_atoms]
        bb_atoms += [ICs.atom_names[i] for i in ICs.topology.graph.neighbors(root_idx) if i not in neighbor_idx]

    else:
        bb_atoms = get_bb_candidates(ICs.atom_names, ICs.resnames[0])
        bb_atoms = [atom for atom in bb_atoms if atom in ICs.atom_names]
        bb_idxs = np.argwhere(np.isin(ICs.atom_names, bb_atoms)).flatten()
        neighbors = set(sum([ICs.topology.graph.neighbors(i) for i in bb_idxs], start=[]))
        neighbor_names = [ICs.atom_names[n] for n in neighbors]
        bb_atoms += [n for n in neighbor_names if n not in bb_atoms]

    sc_mask = ~np.isin(ICs.atom_names, bb_atoms)

    ha_mask = ~(ICs.atom_types=='H')
    mask = ha_mask * sc_mask
    idxs = np.argwhere(mask).flatten()

    cyverts = ICs.topology.ring_idxs
    rotatable_bonds = {}
    _idxs = []
    for idx in idxs:
        dihedral = ICs.z_matrix_idxs[idx]
        bond = tuple(dihedral[1:3])

        # Skip duplicate dihedral defs
        if bond in rotatable_bonds:
            continue

        # Skip bonds that define ic_mx
        elif idx < 3:
            continue

        # Skip double bonds
        elif ICs.bond_types[tuple(sorted(bond[::-1]))] > 1:
            continue

        # Skip ring dihedrals
        elif any(all(a in ring for a in bond) for ring in cyverts):
            continue

        else:
            rotatable_bonds[bond] = dihedral
            _idxs.append(idx)

    idxs = _idxs
    dihedral_defs = [ICs.z_matrix_names[idx][::-1] for idx in idxs]
    return dihedral_defs




[docs]
@dataclass
class FreeAtom:
    """Atom class for atoms in cartesian space that do not belong to a MolSys.

    Attributes
    ----------
    name : str
        Atom name
    atype : str
        Atom type
    index : int
        Atom number
    resn : str
        Name of the residue that the atom belongs to
    resi : int
        The residue index/number that the atom belongs to
    coords : np.ndarray
        The cartesian coordinates of the Atom
    """

    name: str
    atype: str
    index: int
    resn: str
    resi: int
    coords: np.ndarray




[docs]
def save_ensemble(name: str, atoms: ArrayLike, coords: ArrayLike = None) -> None:
    """Save a rotamer ensemble as multiple states of the same molecule.

    Parameters
    ----------
    name : str
        file name to save rotamer ensemble to
    atoms : ArrayLike
        list of Atom objects
    coords : ArrayLike
        Array of atom coordinates corresponding to Atom objects
    """

    if not name.endswith(".pdb"):
        name += ".pdb"

    if coords is None and isinstance(atoms[0], list):
        with open(name, "w", newline="\n") as f:
            for i, model in enumerate(atoms):
                f.write(f"MODEL {i + 1}\n")
                for atom in model:
                    f.write(
                        f"ATOM  {atom.index + 1:5d}  {atom.name:<4s}{atom.resn:3s} {'A':1s}{atom.resi:4d}   "
                        f"{atom._coords[0]:8.3f}{atom._coords[1]:8.3f}{atom._coords[2]:8.3f}{1.0:6.2f}{1.0:6.2f}        "
                        f"  {atom.atype:>2s}\n"
                    )
                f.write("ENDMDL\n")

    elif len(coords.shape) > 2:
        with open(name, "w", newline="\n") as f:
            for i, model in enumerate(coords):
                f.write(f"MODEL {i + 1}\n")
                for atom, coord in zip(atoms, model):
                    f.write(
                        f"ATOM  {atom.index + 1:5d}  {atom.name:<4s}{atom.resn:3s} {'A':1s}{atom.resi:4d}   "
                        f"{coord[0]:8.3f}{coord[1]:8.3f}{coord[2]:8.3f}{1.0:6.2f}{1.0:6.2f}          {atom.atype:>2s}\n"
                    )
                f.write("ENDMDL\n")

    else:
        save_pdb(name, atoms, coords)




[docs]
def save_pdb(name: Union[str, Path], atoms: ArrayLike, coords: ArrayLike, mode: str = "w") -> None:
    """Save a single state pdb structure of the provided atoms and coords.

    Parameters
    ----------
    name : str, Path
        Name or Path object of file to save
    atoms : ArrayLike
        List of Atom objects to be saved
    coords : ArrayLike
        Array of atom coordinates corresponding to atoms
    mode : str
        File open mode. Usually used to specify append ("a") when you want to add structures to a PDB rather than
        overwrite that pdb.
    """
    name = Path(name) if isinstance(name, str) else name
    name = name.with_suffix(".pdb")

    with open(name, mode, newline="\n") as f:
        f.write('MODEL\n')
        for atom, coord in zip(atoms, coords):
            f.write(
                f"ATOM  {atom.index + 1:5d} {atom.name:^4s} {atom.resn:3s} {'A':1s}{atom.resi:4d}    "
                f"{coord[0]:8.3f}{coord[1]:8.3f}{coord[2]:8.3f}{1.0:6.2f}{1.0:6.2f}          {atom.atype:>2s}  \n"
            )
        f.write('ENDMDL\n')




[docs]
def get_missing_residues(
        protein: Union[MDAnalysis.Universe, MDAnalysis.AtomGroup],
        ignore: Set[int] = None,
        use_H: bool = False,
        ignore_waters = True
) -> List:
    """Get a list of RotamerEnsemble objects corresponding to the residues of the provided protein that are missing
    heavy atoms.

    Parameters
    ----------
    protein : MDAnalysis.Universe, MDAnalysis.AtomGroup
        MolSys to search for residues with missing atoms.
    ignore : set
        List of residue numbers to ignore. Usually sites you plan to label or mutate.
    use_H : bool
        Whether the new side chain should have hydrogen atoms.

    Returns
    -------
    missing_residues : list
        A list of RotamerEnsemble objects corresponding to residues with missing heavy atoms.
    """
    ignore = set() if ignore is None else ignore
    missing_residues = []
    cache = {}

    for res in protein.residues:
        # Only consider supported residues because otherwise chiLife wouldn't know what's missing
        if (
                res.resname not in SUPPORTED_RESIDUES
                or res.resnum in ignore
                or res.resname in ["ALA", "GLY"]
        ):
            continue

        # Check if there are any missing heavy atoms
        heavy_atoms = res.atoms.types[res.atoms.types != "H"]
        resn = res.resname

        a = cache.setdefault(resn, len(xl.RotamerEnsemble(resn).atom_names))
        if len(heavy_atoms) != a:
            missing_residues.append(
                xl.RotamerEnsemble(
                    res.resname,
                    res.resnum,
                    protein=protein,
                    chain=res.segid,
                    use_H=use_H,
                    ignore_waters=ignore_waters
                )
            )

    return missing_residues




[docs]
def mutate(
        protein: MDAnalysis.Universe,
        *ensembles: 'RotamerEnsemble',
        add_missing_atoms: bool = True,
        rotamer_index: Union[int, str, None] = None,
        use_H: bool = None,
        ignore_waters: bool = None
        ) -> MDAnalysis.Universe:
    """Create a new Universe where the native residue is replaced with the highest probability rotamer from a
    RotamerEnsemble or SpinLabel object.

    Parameters
    ----------
    protein : MDAnalysis.Universe
        An MDA Universe object containing protein to be spin labeled
    ensembles : RotamerEnsemble, SpinLabel
        Precomputed RotamerEnsemble or SpinLabel object to use for selecting and replacing the spin native amino acid
    rotamer_index : int, str, None
        Index of the rotamer to be used for the mutation. If None, the most probable rotamer will be used. If 'all',
        mutate will return an ensemble with each rotamer, if random, mutate will return an ensemble with a random
        rotamer.
    add_missing_atoms : bool
        Model side chains missing atoms if they are not present in the provided structure.
    ignore_waters : bool
        ignore waters when selecting conforers for mutation.
    Returns
    -------
    U : MDAnalysis.Universe
        New Universe with a copy of the spin labeled protein with the highest probability rotamer
    """

    # Check for dRotamerEnsembles in ensembles
    temp_ensemble = []
    for lib in ensembles:
        if isinstance(lib, Ensemble):
            temp_ensemble.append(lib)
        else:
            raise TypeError(f"mutate only accepts (d)RotamerEnsemble and (d)SpinLabel objects, not {lib}.")

    ensembles = temp_ensemble

    if add_missing_atoms:
        use_H = use_H if use_H is not None else param_from_rotlibs('use_H', ensembles)
        ignore_waters = ignore_waters if ignore_waters is not None else param_from_rotlibs('ignore_waters', ensembles)

        missing_residues = get_missing_residues(protein,
                                                ignore={res.site for res in ensembles},
                                                use_H=use_H,
                                                ignore_waters=ignore_waters)

        ensembles = list(ensembles) + missing_residues

    label_sites = {}
    for spin_label in ensembles:
        if isinstance(spin_label, xl.dRotamerEnsemble):
            label_sites[spin_label.site1, spin_label.icode1, spin_label.chain] = spin_label
            label_sites[spin_label.site2, spin_label.icode2, spin_label.chain] = spin_label
        else:
            label_sites[spin_label.site, spin_label.icode, spin_label.chain] = spin_label

    # Remove waters if they are being ignored. 
    if ignore_waters:
        protein = protein.select_atoms('(not altloc B) and (not (byres name OH2 or resname HOH))')
    else:
        protein = protein.select_atoms('(not altloc B)')
        
    label_selstr = " or ".join([f"({label.selstr})" for label in ensembles])
    other_atoms = protein.select_atoms(f"not ({label_selstr})")

    resids = [res.resid for res in protein.residues]
    icodes = [res.icode for res in protein.residues] if hasattr(protein, "icodes") else None
    # Allocate lists for universe information
    atom_info = []
    res_names = []
    segidx = []
    bonds = []

    # Loop over residues in old universe
    for i, res in enumerate(protein.residues):
        resloc = (res.resid, res.icode, res.segid)

        # If the residue is the spin labeled residue replace it with the highest probability spin label
        if resloc in label_sites:
            rot_ens = label_sites[resloc]
            if isinstance(rot_ens, xl.dRotamerEnsemble):
                r1l = len(rot_ens.rl1mask)
                r2l = len(rot_ens.rl2mask)
                both = r1l + r2l

                if resloc[0] == rot_ens.site1:
                    # Add site 1
                    atom_info += [
                        (i, name, atype)
                        for name, atype in zip(rot_ens.atom_names[:r1l], rot_ens.atom_types[:r2l])
                    ]

                elif resloc[0] == rot_ens.site2:
                    atom_info += [
                        (i, name, atype)
                        for name, atype in zip(rot_ens.atom_names[r1l:r1l + r2l], rot_ens.atom_types[r1l:r1l + r2l])
                    ]
                    # Add cap
                    atom_info += [
                        (i, name, atype)
                        for name, atype in zip(rot_ens.atom_names[both:], rot_ens.atom_types[both:])
                    ]
                else:
                    raise RuntimeError("The residue specified is not part of the dRotamerEnsemble being constructed")
            else:
                atom_info += [
                    (i, name, atype)
                    for name, atype in zip(rot_ens.atom_names, rot_ens.atom_types)
                ]

            offset = len(atom_info) - len(rot_ens.atom_names)
            _add_peptide_bond(atom_info, rot_ens, bonds)
            bonds.extend([[b1+offset, b2 + offset] for b1, b2 in rot_ens.bonds])

            # Add missing Oxygen from rotamer ensemble
            res_names.append(rot_ens.res)
            segidx.append(rot_ens.segindex)

        # Else retain the atom information from the parent universe
        else:
            atom_info += [
                (i, atom.name, atom.type) for atom in res.atoms if atom.altLoc != "B"
            ]
            res_names.append(res.resname)
            segidx.append(res.segindex)

    # Reindex segments in case any were dropped from the parent universe
    idxmap = {idx: i for i, idx in enumerate(np.unique(segidx))}
    segidx = np.fromiter((idxmap[idx] for idx in segidx), dtype=int)

    # Unzip atom information into individual lists
    residx, atom_names, atom_types = zip(*atom_info)
    segids = protein.segments.segids
    # Allocate a new universe with the appropriate information

    if isinstance(protein, (mda.Universe, mda.AtomGroup)):
        U = make_mda_uni(atom_names, atom_types, res_names, residx, resids, segidx, segids, icodes=icodes)
    elif isinstance(protein, MolecularSystemBase):
        U = MolSys.from_arrays(atom_names, atom_types, res_names, residx, resids, segidx, segids=segids, icodes=icodes)

    U.add_bonds(bonds)
    # Apply old coordinates to non-spinlabel atoms
    new_other_atoms = U.select_atoms(f"not ({label_selstr})")
    new_other_atoms.atoms.positions = other_atoms.atoms.positions

    if rotamer_index == 'all':
        max_label_len = max([len(label) for label in label_sites.values()])
        coordinates = np.array([U.atoms.positions.copy() for _ in range(max_label_len)])
        U.load_new(coordinates)

        for i, ts in enumerate(U.trajectory):
            for spin_label in label_sites.values():
                sl_atoms = U.select_atoms(spin_label.selstr)
                if len(spin_label) <= i:
                    sl_atoms.positions = spin_label.coords[-1]
                else:
                    sl_atoms.positions = spin_label.coords[i]

    else:
        for spin_label in label_sites.values():
            sl_atoms = U.select_atoms(spin_label.selstr)
            if rotamer_index == 'random':
                rand_idx = np.random.choice(len(spin_label.coords), p=spin_label.weights)
                sl_atoms.atoms.positions = spin_label.coords[rand_idx]
            elif isinstance(rotamer_index, int):
                sl_atoms.atoms.positions = spin_label.coords[rotamer_index]
            else:
                sl_atoms.atoms.positions = spin_label.coords[np.argmax(spin_label.weights)]

    return U


def _add_peptide_bond(atom_info, rot_ens, bonds):
    res_num = atom_info[-1][0]
    offset = len(atom_info) - len(rot_ens.atom_names)

    if res_num > 1 and 'N' in rot_ens.atom_names:
        C_idx = None
        wback = 1
        prev_res = atom_info[offset - wback][0]
        while prev_res == res_num - 1:
            if atom_info[offset - wback][1] == 'C':
                C_idx = offset - wback
                break
            wback += 1

        if C_idx is not None:
            N_idx = np.argwhere(rot_ens.atom_names == 'N').flatten()[0] + offset
            bonds.append([C_idx, N_idx])


[docs]
def randomize_rotamers(
        protein: Union[mda.Universe, mda.AtomGroup],
        rotamer_libraries: List['RotamerEnsemble'],
        **kwargs,
) -> None:
    """Modify a protein object in place to randomize side chain conformations.

    Parameters
    ----------
    protein : MDAnalysis.Universe, MDAnalysis.AtomGroup
        MolSys object to modify.
    rotamer_libraries : list
        RotamerEnsemble objects attached to the protein corresponding to the residues to be repacked/randomized.
    **kwargs : dict
        Additional Arguments to pass to ``sample`` method. See :mod:`sample <chiLife.RotamerEnsemble.sample>` .
    """
    for rotamer in rotamer_libraries:
        coords, weight = rotamer.sample(off_rotamer=kwargs.get("off_rotamer", False))
        mask = ~np.isin(protein.ix, rotamer.clash_ignore_idx)
        protein.atoms[~mask].positions = coords




[docs]
def param_from_rotlibs(param: str, ensembles: List['RotamerEnsemble']):
    """
    Get the value of a parameter used across a set of rotamer ensembles if that parameter is consistent. If the
    parameter is not the same value for all rotamer ensembles, an AttributeError will be thrown.
    Parameters
    ----------
    param : str
        The name of the parameter.
    ensembles : List[RotamerEnsembles]
        The rotamer ensembles to search for the parameter int

    Returns
    -------
    param_value: Any
        The value of the parameter across all the rotamer libarries.
    """
    # If there are no residues being mutated just return defaults
    if len(ensembles) == 0:
        defaults = {}
        defaults.update(re.assign_defaults(defaults))
        return defaults[param]

    # Otherise check to make sure they are all the same
    ensemble_params = [getattr(ensemble, param, None) for ensemble in ensembles]
    if all(e == ensemble_params[0] for e in ensemble_params[1:]):
        param_value = ensemble_params[0]
        return param_value

    # And return an error if they are not
    else:
        raise AttributeError(f"User provided ensembles with different {param} parameters. Make sure all "
                             f"ensembles use the same value for {param}")





[docs]
def get_sas_res(
        protein: Union[mda.Universe, mda.AtomGroup],
        cutoff: float = 30,
        forcefield = 'charmm',
) -> Set[Tuple[int, str]]:
    """Run FreeSASA to get solvent accessible surface residues in the provided protein

    Parameters
    ----------
    protein : MDAnalysis.Universe, MDAnalysis.AtomGroup
        MolSys object to measure Solvent Accessible Surfaces (SAS) area of and report the SAS residues.
    cutoff : float
        Exclude residues from list with SASA below cutoff in angstroms squared.
    forcefield : Union[str, chilife.ljParams]
        Forcefiled to use defining atom radii for calculating solvent accessibility.

    Returns
    -------
    SAResi : set
        A set of solvent accessible surface residues.

    """
    if isinstance(forcefield, str):
        forcefield = scoring.ljEnergyFunc(forcefield)

    environment_coords = protein.atoms.positions
    environment_radii = forcefield.get_lj_rmin(protein.atoms.types)
    atom_sasa = get_sasa(environment_coords, environment_radii, by_atom=True)

    SASAs = {(residue.resnum, residue.segid) for residue in protein.residues if
             atom_sasa[0, residue.atoms.ix].sum() >= cutoff}

    return SASAs




[docs]
def pose2mda(pose) -> MDAnalysis.Universe:
    """Create an MDAnalysis universe from a pyrosetta pose

    Parameters
    ----------
    pose : pyrosetta.rosetta.core.Pose
        pyrosetta pose object.

    Returns
    -------
    mda_protein : MDAnalysis.Universe
        Copy of the input pose as an MDAnalysis Universe object
    """
    coords = np.array(
        [
            res.xyz(atom)
            for res in pose.residues
            for atom in range(1, res.natoms() + 1)
            if res.atom_type(atom).element().strip() != "X"
        ]
    )
    atypes = np.array(
        [
            str(res.atom_type(atom).element()).strip()
            for res in pose.residues
            for atom in range(1, res.natoms() + 1)
            if res.atom_type(atom).element().strip() != "X"
        ]
    )
    anames = np.array(
        [
            str(res.atom_name(atom)).strip()
            for res in pose.residues
            for atom in range(1, res.natoms() + 1)
            if res.atom_type(atom).element().strip() != "X"
        ]
    )
    resindices = np.array(
        [
            res.seqpos() - 1
            for res in pose.residues
            for atom in range(1, res.natoms() + 1)
            if res.atom_type(atom).element().strip() != "X"
        ]
    )

    n_residues = len(pose)

    segindices = np.array([0] * n_residues)
    resnames = np.array([res.name() for res in pose])
    resnums = np.array([res.seqpos() for res in pose])

    mda_protein = make_mda_uni(anames, atypes, resnames, resindices, resnums, segindices)
    mda_protein.positions = coords

    return mda_protein




[docs]
def xplor2mda(xplor_sim) -> MDAnalysis.Universe:
    """
    Converts an Xplor-NIH xplor.simulation object to an MDAnalysis Universe object

    Parameters
    ----------
    xplor_sim : xplor.simulation
        an Xplor-NIH simulation object. (See https://nmr.cit.nih.gov/xplor-nih/doc/current/python/ref/simulation.html)
    Returns
    -------

    """
    n_atoms = xplor_sim.numAtoms()
    a_names = np.array([xplor_sim.atomName(i) for i in range(n_atoms)])
    a_types = np.array([xplor_sim.chemType(i)[0] for i in range(n_atoms)])

    resnames = np.array([xplor_sim.residueName(i) for i in range(len(xplor_sim.residueNameArr()))])
    resnums = np.array(xplor_sim.residueNumArr())

    _, uidx = np.unique(resnums, return_index=True)
    _, resindices = np.unique(resnums, return_inverse=True)

    resnums = resnums[uidx]
    resnames = resnames[uidx]

    n_residues = len(resnames)
    segindices = np.array([0] * n_residues)

    mda_protein = make_mda_uni(a_names, a_types, resnames, resindices, resnums, segindices)
    mda_protein.atoms.positions = np.array(xplor_sim.atomPosArr())

    return mda_protein




[docs]
def make_mda_uni(anames: ArrayLike,
                 atypes: ArrayLike,
                 resnames: ArrayLike,
                 resindices: ArrayLike,
                 resnums: ArrayLike,
                 segindices: ArrayLike,
                 segids: ArrayLike = None,
                 icodes: ArrayLike = None,
                 ) -> MDAnalysis.Universe:
    """
    Create an MDAnalysis universe from numpy arrays of atom information.

    Parameters
    ----------
    anames : ArrayLike
        Array of atom names. Length should be equal to the number of atoms.
    atypes : ArrayLike
        Array of atom elements or types. Length should be equal to the number of atoms.
    resnames : ArrayLike
        Array of residue names. Length should be equal to the number of residues.
    resindices : ArrayLike
        Array of residue indices. Length should be equal to the number of atoms. Elements of resindices should
        map to resnames and resnums of the atoms they represent.
    resnums : ArrayLike
        Array of residue numbers. Length should be equal to the number of residues.
    segindices : ArrayLike
        Array of segment/chain indices. Length should be equal to the number of residues.
    segids : ArrayLike, None
        Array of segment/chain IDs. Length should be equal to the number of segs/chains.

    Returns
    -------
    mda_uni : MDAnalysis.Universe
        The Universe created by the function.
    """

    n_atoms = len(anames)
    n_residues = len(np.unique(resindices))

    if segids is None:
        segids = np.array(["ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i] for i in range(len(np.unique(segindices)))])
    elif len(segids) != len(np.unique(segindices)):
        pass

    if icodes is None:
        icodes = np.array(["" for _ in range(n_residues)])

    mda_uni = mda.Universe.empty(
        n_atoms,
        n_residues=n_residues,
        atom_resindex=resindices,
        residue_segindex=segindices,
        trajectory=True,
    )

    mda_uni.add_TopologyAttr("type", atypes)
    mda_uni.add_TopologyAttr("resnum", resnums)
    mda_uni.add_TopologyAttr("resids", resnums)
    mda_uni.add_TopologyAttr("icodes", icodes)
    mda_uni.add_TopologyAttr("resname", resnames)
    mda_uni.add_TopologyAttr("name", anames)
    mda_uni.add_TopologyAttr("altLocs", [""] * len(atypes))
    mda_uni.add_TopologyAttr("segid")

    for i, segid in enumerate(segids):
        if i == 0:
            i_segment = mda_uni.segments[0]
            i_segment.segid = segid
        else:
            i_segment = mda_uni.add_Segment(segid=str(segid))

        mask = np.argwhere(np.asarray(segindices) == i).flatten()
        mda_uni.residues[mask.tolist()].segments = i_segment

    mda_uni.add_TopologyAttr(Segids(np.array(segids)))
    mda_uni.add_TopologyAttr(Atomindices())
    mda_uni.add_TopologyAttr(Resindices())
    mda_uni.add_TopologyAttr(Segindices())

    return mda_uni




[docs]
def template_ICs(template):
    """
    Get the internal coordinate parameters (bond angle and dihedral angle) of the protein backbone atoms of the
    template.


    Parameters
    ----------
    template : Union[str, Path, MolSys]
        The PDB file or MolSys object to serve as the template.

    Returns
    -------
    phi, psi, omega, bond_angles : ArrayLike
        Arrays containing backbone dihedral and bond angles as calculated from the template structure.
    """

    if isinstance(template, (str, Path)):
        template = xl.MolSys.from_pdb(template)
    elif isinstance(template, (mda.Universe, mda.AtomGroup)):
        template = xl.MolSys.from_atomsel(template)
    elif not isinstance(template, MolecularSystemBase):
        raise RuntimeError('Template must be a PDB file or a MDA.Universe, AtomGroup or chiLife.MolSys object.')


    Ns  = template.select_atoms('name N')
    CAs = template.select_atoms('name CA')
    Cs = template.select_atoms('name C')
    Os = template.select_atoms('name O')

    phis = get_dihedrals(Cs.positions[:-1], Ns.positions[1:], CAs.positions[1:], Cs.positions[1:])
    psis = get_dihedrals(Ns.positions[:-1], CAs.positions[:-1], Cs.positions[:-1], Ns.positions[1:])
    omegas = get_dihedrals(CAs.positions[:-1], Cs.positions[:-1], Ns.positions[1:], CAs.positions[1:])

    # Add to phis and psis for N_CA_C_O and H_N_CA_C

    BA1 = np.concatenate([[0.], get_angles(CAs.positions[:-1], Cs.positions[:-1], Ns.positions[1:])])
    BA2 = np.concatenate([[0.], get_angles(Cs.positions[:-1], Ns.positions[1:], CAs.positions[1:])])
    BA3 = get_angles(Ns.positions, CAs.positions, Cs.positions)
    BA4 = get_angles(CAs.positions, Cs.positions, Os.positions)
    bond_angles = np.array([BA1, BA2, BA3, BA4]).T

    return tuple(np.rad2deg(x) for x in (phis, psis, omegas, bond_angles))




[docs]
def make_peptide(sequence: str, phi=None, psi=None, omega=None, bond_angles=None, template=None) -> MolSys:
    """
    Create a peptide from a string of amino acids. chilife NCAA rotamer libraries and smiles can be inserted by using
    square brackets and angle brackets respectively , e.g. ``[ACE]AA<C1=CC2=C(C(=C1)F)C(=CN2)CC(C(=O)O)N>AA[NME]``
    where ACE and NME are peptide caps and <C1=CC2=C(C(=C1)F)C(=CN2)CC(C(=O)O)N> is a smiles for a NCAA. Backbone
    torsion and bond angles can be set using the phi, psi, omega, and bond_angel keyword argument. All angles should
    be passed in degrees. Alternativly backbone angles can be set with a protein template that can either be a MolSys
    object, or a sting/Path object pointing to a PDB file.


    Parameters
    ----------
    sequence : str
        The Amino acid sequence.

    phi : ArrayLike
        N-1 length array specifying the peptide backbone phi angles (degrees).
    psi : ArrayLike
        N-1 length array specifying the peptide backbone psi angles (degrees).
    omega : ArrayLike
        N-1 length array specifying the peptide backbone omega angles (degrees).
    bond_angles : ArrayLike
        Nx4 shaped array specifying the 4 peptide backbone bond angles  (degrees). Note that, for the first residue any
        value for the first and second bond angles will be ignored because they are undefined.

        1 - CA(i-1)-C(i-1)-N
        2 - C(i-1)_N_CA
        3 - N-CA-C
        4 - CA-C-O

    template : str, Path, MolSys


    Returns
    -------
    mol : MolSys
        A chiLife MolSys object
    """
    C_N_LENGTH = 1.34
    N_CA_LENGTH = 1.46
    CA_C_LENGTH = 1.54

    CA_C_N_ANGLE = 1.9897
    C_N_CA_ANGLE = 2.1468
    N_CA_C_ANGLE = 1.9199
    base_IC = {}

    # Strip whitespace from multiline strings
    sequence = sequence.strip()

    # Parse sequences
    sequence = parse_sequence(sequence)
    zmat = []
    zmat_idxs = []
    anames = []
    atypes = []
    resnames = []
    resindices = []
    resnums = []

    seqiter = iter(sequence)
    atom_idx = 0
    residue_idx = 0
    prev_N = 0

    ncap = None
    ccap = None

    if template is None:
        pass
    elif all(x is None for x in (phi, psi, omega, bond_angles)):
        phi, psi, omega, bond_angles = template_ICs(template)
    else:
        raise RuntimeError('You can not pass both a template and explicit bond_angle or backbone dihedral values')

    # Alter definitions to be based off of other atoms, e.g. N-CA-C-O instead of N-CA-C-N
    phi = np.deg2rad(phi) if phi is not None else np.ones(len(sequence)) * -0.82030
    psi = np.deg2rad(psi) if psi is not None else np.ones(len(sequence)) * -0.99484
    omega = np.deg2rad(omega) if omega is not None else np.ones(len(sequence)) * -np.pi

    if len(phi) < len(sequence):
        diff = len(sequence) - len(phi)
        if diff > 3:
            raise RuntimeError('The number of phi backbone dihedrals does not match the number of residues')
        phi = (np.concatenate(([-0.82030], phi)) if diff == 1 else
               np.concatenate(([-0.82030], phi, [-0.82030])) if diff == 2 else
               np.concatenate(([-0.82030, -0.82030], phi, [-0.82030])))

    if len(psi) < len(sequence):
        diff = len(sequence) - len(psi)
        if diff > 3:
            raise RuntimeError('The number of psi backbone dihedrals does not match the number of residues')
        psi = (np.concatenate(([-0.99484], psi)) if diff == 1 else
               np.concatenate(([-0.99484, -0.99484], psi)) if diff == 2 else
               np.concatenate(([-0.99484], psi, [-0.99484, -0.99484])))

    if len(omega) < len(sequence):
        diff = len(sequence) - len(omega)
        if diff > 3:
            raise RuntimeError('The number of omega backbone dihedrals does not match the number of residues')
        omega = (np.concatenate(([-np.pi], omega)) if diff == 1 else
                 np.concatenate(([-np.pi, -np.pi], omega, [])) if diff == 2 else
                 np.concatenate(([-np.pi], omega, [-np.pi, -np.pi])))

    for i, res in enumerate(seqiter):
        if res in base_IC:
            msysIC = base_IC[res]
        elif res in xl.nataa_codes or res in xl.SUPPORTED_BB_LABELS:
            with open(xl.RL_DIR / f"residue_internal_coords/{res.lower()}_ic.pkl", 'rb') as f:
                msysIC = pickle.load(f)
            base_IC[res] = msysIC
        elif res in xl.SUPPORTED_RESIDUES:
            RL = xl.RotamerEnsemble(res, use_H=True)
            idxmax = np.argmax(RL.weights)
            msysIC = RL.internal_coords
            msysIC.trajectory[idxmax]
            base_IC[res] = msysIC
        elif res in xl.ncaps:
            ncap = res
            continue
        elif res in xl.ccaps:
            ccap = res
            continue
        else:
            mol = smiles2residue(res)
            msysIC = xl.MolSysIC.from_atoms(mol)

        anames.append(msysIC.atom_names)
        atypes.append(msysIC.atom_types)
        resnames.append(msysIC.atom_resnames)
        resnums.append(msysIC.atom_resnums + residue_idx)
        resindices.append(msysIC.atom_resnums + residue_idx-1)

        # Set backbone dihedrals
        if i < len(sequence)-1:
            msysIC.set_dihedral(psi[i+1] + np.pi, 1, ['N', 'CA', 'C', 'O'])
        if 'H' in msysIC.atom_names:
            msysIC.set_dihedral(phi[i] + np.pi, 1, ['C', 'CA', 'N', 'H'])

        tzmat = msysIC.z_matrix.copy()
        tzmat_idxs = msysIC.z_matrix_idxs.copy()

        if atom_idx != 0:
            tzmat_idxs += atom_idx
            tzmat_idxs[0] = [atom_idx, prev_N + 2, prev_N + 1, prev_N]
            tzmat_idxs[1] = [atom_idx + 1, atom_idx, prev_N + 2, prev_N + 1]
            tzmat_idxs[2] = [atom_idx + 2, atom_idx + 1, atom_idx, prev_N + 2]

            tzmat[0] = [C_N_LENGTH, CA_C_N_ANGLE, psi[i]]
            tzmat[1] = [N_CA_LENGTH, C_N_CA_ANGLE, omega[i]]
            tzmat[2] = [CA_C_LENGTH, N_CA_C_ANGLE, phi[i]]

            prev_N = atom_idx

        zmat.append(tzmat)
        zmat_idxs.append(tzmat_idxs)

        atom_idx = atom_idx + len(tzmat)
        residue_idx += 1

    anames = np.concatenate(anames)
    atypes = np.concatenate(atypes)
    resnames = np.concatenate(resnames)
    resnums = np.concatenate(resnums)
    resindices = np.concatenate(resindices)
    zmat = np.concatenate(zmat)
    zmat_idxs = np.concatenate(zmat_idxs)
    segindices = np.array([0] * len(anames))
    segids = np.array(['A'] * len(anames))

    if bond_angles is not None:
        assert len(bond_angles) == len(np.unique(resindices))
        for i, atom_name in enumerate(('N', 'CA', 'C', 'O')):
            offset = 1 if atom_name in ('N', 'CA') else 0
            idxs = np.argwhere(anames == atom_name).flat[offset:]
            zmat[idxs, 1] = np.deg2rad(bond_angles[offset:, i])

    trajectory = _ic_to_cart(zmat_idxs[:, 1:], zmat)

    mol = MolSys.from_arrays(anames, atypes, resnames, resindices, resnums, segindices,
                             segids=segids, trajectory=trajectory)

    if ncap is not None:
        d = phi[1] if phi[1] != -0.82030 else None
        mol = append_cap(mol, ncap, dihedral=d)
    if ccap is not None:
        d = psi[-1] if psi[-1] != -0.99484 else None
        mol = append_cap(mol, ccap, dihedral=d)

    return mol




[docs]
def parse_sequence(sequence: str) -> List[str]:
    """
    Input a string of amino acids with mized 1-letter codes, square brackted ``[]`` 3-letter codes or angle ``<>``
    bracketed smiles and return a list of 3-letter codes and smiles.

    Parameters
    ----------
    sequence : str
    The Amino acid sequence.

    Returns
    -------
    parsed_sequence : List[str]
    A list of the amino acid sequences.

    """
    parsed_sequence = []
    seqiter = iter(sequence)
    for aa in seqiter:
        if aa.upper() in xl.nataa_codes:
            parsed_sequence.append(xl.nataa_codes[aa.upper()])

        # Parse chiLife compatible NCAAs
        elif aa == '[':
            code = ""
            aa = next(seqiter)
            while aa != ']':
                code += aa
                try:
                    aa = next(seqiter)
                except StopIteration:
                    raise RuntimeError("Cannot parse sequence because there is an unbalaced square bracket ``[]``.")
            parsed_sequence.append(code)

        elif aa == "<":
            smiles = ""
            aa = next(seqiter)
            while aa != '>':
                smiles += aa
                try:
                    aa = next(seqiter)
                except:
                    raise RuntimeError("Cannot parse sequence because there is an unbalnced angle bracket ``<>``.")

            parsed_sequence.append(smiles)

        elif aa in ('>', ']'):
            raise RuntimeError(f"A terminating ``{aa}`` bracket has been detected, but no opening bracket was placed "
                               f"ahead of it")

        else:
            raise RuntimeError(f'``{aa}`` is not a known amino acid')

    return parsed_sequence




[docs]
def smiles2residue(smiles : str, **kwargs) -> MolSys:
    """
    Create a protein residue from a smiles string. Smiles string must contain an N-C-C-O dihedral to identify the
    protein backbone. If no dihedral is found or there are too many N-C-C-O dihedrals that are indistinguishable
    this function will fail.


    Parameters
    ----------
    smiles : str
        SMILES string to convert to 3d and a residue
    kwargs : dict
        Keyword arguments to pass to rdkit.AllChem.EmbedMolecule (usually randomSeed)

    Returns
    -------
    msys : MolSys
        chiLife molecular system object containing the smiles string as a 3D molecule and single residue.
    """

    if not rdkit_found:
        raise RuntimeError("Using smiles2residue or make_peptide with a smile string requires rdkit to be installed.")

    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)

    success = AllChem.EmbedMolecule(mol, **kwargs)
    if success != 0:
        AllChem.EmbedMolecule(mol, enforceChirality=False, **kwargs)

    AllChem.MMFFOptimizeMolecule(mol, maxIters=200)

    res = MolSys.from_rdkit(mol)

    atoms = res.atoms
    res.filename = 'UNK.pdb'
    Natoms = atoms.select_atoms('type N')
    NCCOs = []
    for atom in Natoms:
        for dih in atom.dihedrals:
            if np.all(dih.atoms.types == ['N', 'C', 'C', 'O']):
                NCCOs.append(dih.atoms)
            elif np.all(dih.atoms.types == ['O', 'C', 'C', 'N']):
                NCCOs.append(dih.atoms[::-1])

    bb_candidates = []
    for dih in NCCOs:
        assert dih[-1].type == 'O'

        if len(dih[-1].bonds) == 1:
            bb_candidates.append(dih)

    if len(bb_candidates) == 1:
        bb = bb_candidates[0]

    else:
        bb = None
        for can in bb_candidates:
            cooh = "".join(sorted(can[2].bonded_atoms.types))
            if cooh != 'COO':
                continue
            else:
                bb = can
                break

    bb.names = ['N', 'CA', 'C', 'O']
    count_dict = {}
    for atom in atoms:

        if atom in bb:
            pass

        elif 'N' in atom.bonded_atoms.names:
            if atom.type == 'H':
                atom.name = count_dict.get('X', 'H')
                count_dict['X'] = 'X'

        elif 'CA' in atom.bonded_atoms.names and atom.type == 'H':
            atom.name = 'HA'

        elif 'O' in atom.bonded_atoms.names and atom.type == 'H':
            atom.name = 'X'

        elif 'C' in atom.bonded_atoms.names and atom.type == 'O':
            atom.name = 'X'
            for oatom in atom.bonded_atoms:
                if oatom.type == 'H':
                    oatom.name ='X'
        elif atom.name == 'X':
            pass

        else:
            n = count_dict.setdefault(atom.type, 1)
            atom.name = f'{atom.type}{n}'
            count_dict[atom.type] += 1

    save_atoms = atoms.select_atoms('not name X')
    xl.save('UNK.pdb', save_atoms)
    Msys = MolSys.from_pdb('UNK.pdb', sort_atoms=True)
    os.remove('UNK.pdb')

    return Msys




[docs]
def append_cap(mol : MolSys, cap : str, resnum = None, dihedral: float = None) -> MolSys:
    """
    Append a peptide cap to a molecular system.

    Parameters
    ----------
    mol : MolSys
        The molecular system to be capped.
    cap : str
        The name (3-letter identifier) of the cap to be attached.

    Returns
    -------
    mol : MolSys
        The molecular system with the cap.
    """

    cap_name = cap.upper()
    term = "N" if cap_name in xl.ncaps else "C" if cap_name in xl.ccaps else None

    if resnum is not None:
        neighbor = mol.select_atoms(f'resnum {resnum}')
    elif term == "N":
        neighbor = mol.residues[0]
        resnum = neighbor.resnum
    elif term == "C":
        neighbor = mol.select_atoms('protein').residues[-1]
        resnum = neighbor.resnum
    else:
        raise RuntimeError('`term` not found in neighboring residue. Note that `term` must be an `N` or a `C`.')

    nmx, nori = xl.global_mx(*neighbor.select_atoms('name N CA C').positions)
    cap_struct = xl.MolSys.from_pdb(xl.RL_DIR / f'cap_pdbs/{cap_name}.pdb')
    cap_struct.positions = cap_struct.positions @ nmx + nori
    cap_struct.resnum = resnum + 1 if term == "C" else resnum - 1

    if dihedral is not None:
        # Identify nearst atom of cap as bonded atom
        neighbor_atom = neighbor.atoms[neighbor.names == term].position[0]
        bonded_atom = np.linalg.norm(cap_struct.positions - neighbor_atom, axis=1).argmin()
        bonded_atom = cap_struct.atoms[bonded_atom]
        
        # Set up dih
        if term == 'N':
            dihedral_points = bonded_atom.positions, *neighbor.select_atoms('name N CA C').positions
        else:
            dihedral_points = *neighbor.select_atoms('name N CA C').positions, bonded_atom.positions
            dihedral_points = dihedral_points[::-1]

        start_angle = get_dihedral(dihedral_points)
        theta = start_angle - dihedral
        v = dihedral_points[2] - dihedral_points[1]
        v /= np.linalg.norm(v)
        mx = get_dihedral_rotation_matrix(theta, v)
        cap_struct.positions = (cap_struct.positions - dihedral_points[2]) @ mx.T + dihedral_points[2]
    elif term=='C' and 'OXT' in neighbor.names:
        C_pos = neighbor.select_atoms('name C').positions
        vfrom =  cap_struct[0].position - C_pos
        vfrom /= np.linalg.norm(vfrom)
        vto = neighbor.select_atoms('name OXT').positions - C_pos
        vto /= np.linalg.norm(vto)

        R, _ = Rotation.align_vectors(vfrom, vto)
        R = R.as_matrix()
        cap_struct.positions = (cap_struct.positions - C_pos) @ R + C_pos
        mol = mol.select_atoms(f'not (resid {resnum} and name OXT)')

    systems = [cap_struct, mol] if term == "N" else [mol, cap_struct]

    mol = concat_molsys(*systems)

    return mol




[docs]
def store_cap(name, mol, term):
    """
    Save the cap attached to a molecule. A pdb file will be saved in the chilife/data/rotamer_libraries/cap_pdbs folder
    with the provided name and the cap will be registered in either the ncaps.txt or ccaps.txt file in the
    chilife/data/rotamer_libraries directory.

    Parameters
    ----------
    name : str
        Name (3-letter identifier) of the cap to be attached.
    mol : str, Path, Universe, AtomGroup, MolSys
        Path, MDA.AtomGroup or MolSys object containing the cap. The cap must be its own residue number, i.e. not merged
        with the reside before or after it.
    term: str
        The terminus the cap is attached to. Only two values are accepted ``N`` and ``C``
    """

    name = name.upper()
    term = term.upper()
    if isinstance(mol, (str, Path)):
        mol = xl.MolSys.from_pdb(mol)
    elif isinstance(mol, (mda.Universe, mda.AtomGroup)):
        mol = xl.MolSys.from_atomsel(mol)

    bonds = xl.guess_bonds(mol.positions, mol.types)
    top = xl.Topology(mol, bonds)
    mol.topology = top

    if term == "N":
        cap = mol.residues[0]
        neighbor = mol.residues[1]
        txt_file = xl.RL_DIR / 'ncaps.txt'
        resnum=0

    elif term == "C":
        cap = mol.residues[-1]
        neighbor = mol.residues[-2]
        txt_file = xl.RL_DIR / 'ccaps.txt'
        resnum = 1
    else:
        raise RuntimeError('`term` not found in neighboring residue. Note that `term` must be an `N` or a `C`.')

    ori, mx = xl.local_mx(*neighbor.select_atoms("name N CA C").positions)
    cap.positions = (cap.positions - ori) @ mx
    with open(txt_file, 'r+') as f:
        line = f.readline()
        keys = line.split()
        if name not in keys:
            f.write(f" {name}")
    cap.resnum = resnum
    xl.save(xl.RL_DIR / f'cap_pdbs/{name}.pdb', cap)




[docs]
def get_grid_mx(N, CA, C):
    """
    Get the rotation matrix and translation vector to orient a grid of lennard-jones spheres into a new coordinate
    frame. It is expected that the grid is defined as it is in :func:`screen_site_volume`, i.e. the xy plane of the grid
    is centered at the origin and the z-axis is positive.

    Parameters
    ----------
    N : np.ndarray
        First coordinate of the site.
    CA : np.ndarray
        Second coordinate of the site (The origin).
    C : np.ndarray
        Third coordinate of the site.

    Returns
    -------
    rotation matrix: np.ndarray
        Rotation matrix for the grid. Should be applied before translation.
    origin : np.ndarray
        Translation vector for the grid. Should be applied after rotation.
    """

    CA_N = N - CA
    CA_N /= np.linalg.norm(CA_N)

    CA_C = C - CA
    CA_C /= np.linalg.norm(CA_C)

    dummy_axis = CA_N + CA_C
    dummy_axis = dummy_axis / np.linalg.norm(dummy_axis)

    # Define new y-axis
    xaxis_plane = N - C
    dummy_comp = xaxis_plane.dot(dummy_axis)
    xaxis = xaxis_plane - dummy_comp * dummy_axis
    xaxis = xaxis / np.linalg.norm(xaxis)

    theta = np.deg2rad(35.25)
    mx = get_dihedral_rotation_matrix(theta, xaxis)
    yaxis = mx @ dummy_axis

    zaxis = np.cross(xaxis, yaxis)
    rotation_matrix = np.array([xaxis, yaxis, zaxis])
    origin = CA
    return rotation_matrix, origin




[docs]
def write_grid(grid, name='grid.pdb', atype='Se'):
    """
    Given an array of 3-dimensional points (a grid usually), write a PDB so the grid can be visualized.

    Parameters
    ----------
    grid : ArrayLike
        The 3D points to write
    name: str
        The name of the file to write the grid to.
    atype : str
        Atom type to use for the grid.
    """

    with open(name, 'w') as f:
        for i, p in enumerate(grid):
            f.write(io.fmt_str.format(i+1, 'SEN', 'GRD', 'A', 1, *p, 1.0, 1.0, atype))




[docs]
def get_site_volume(site: int,
                    mol: Union[MDAnalysis.Universe, MDAnalysis.AtomGroup, MolecularSystemBase],
                    grid_size: Union[float, ArrayLike] = 10,
                    offset: Union[float, ArrayLike] = -0.5,
                    spacing: float = 1.0,
                    vdw_r = 2.8,
                    write: str = False,
                    return_grid: bool = False
                    ) -> Union[float, ArrayLike]:
    """
    Calculate the (approximate) accessible volume of a single site. The volume is calculated by superimposing
    a grid of lennard-jones spheres at the site, eliminating those with clashes and calculating the volume from the
    remaining spheres. Note that this is a different "accessible volume" than the "accessible volume" method made
    popular by MtsslWizard.

    Parameters
    ----------
    site : int
        Site of the `mol` to calculate the accessible volume of.
    mol : MDAnalysis.Universe, MDAnalysis.AtomGroup, chilife.MolecularSystemBase
       The molecule/set of atoms containing the site to calculate the accessible volume of.
    grid_size: float, ArrayLike
        Dimensions of grid to use to calculate the accessible volume. Can be a float, specifying a cube with equal
        dimensions, or an array specifying a unique value for each dimension. Uses units of Angstroms.
    offset: float, ArrayLike
        The offset of the initial grid with respect to the superimposition site. If ``offset`` is a float, the grid will
        be offset in the z-dimension (along the CA-CB bond). If ``offset`` is an array, the grid will be offset in
        each x, y, z dimension by the corresponding value of the array. Uses units of Angstroms
    spacing : float
        Spacing between grid points. Defaults to 1.0 Angstrom
    vdw_r : float
        van der Waals radius for clash detection. Defaults to 2.8 Angstroms.
    write : bool, str, default False
        Switch to make chiLife write the grid to a PDB file. If ``True`` the grid will be written to grid.pdb.
        If given a string chiLife will use the string as the file name.
    return_grid: bool, default False
        If ``return_grid=True``, the volume will be returned as a numpy array containing the grid points that do not
        clash with the neighboring atoms.

    Returns
    -------
    volume : float, np.ndarray
        The accessible volume at the given site. If ``return_grid=True`` the volume will be returned as the set of
        grid points that do not clash with neighboring atoms, otherwise volume will be the estimated accessible volume
        of the site in cubic angstroms (Å :sup:`3`).
    """

    if isinstance(grid_size, (int, float, complex)):
        x = y = z = grid_size
    elif hasattr(grid_size, '__len__'):
        x, y, z = grid_size
    else:
        raise RuntimeError("grid_size must be a float or an array like object with 3 elements.")

    half_x = x / 2
    half_y = y / 2

    if isinstance(offset, (int, float, complex)):
        xo, yo = 0, 0
        zo = offset
    elif hasattr(offset, '__len__'):
        xo, yo, zo = offset
    else:
        raise RuntimeError("offset must be a float or an array like object with 3 elements.")

    xp = x / spacing
    yp = y / spacing
    zp = z / spacing

    # Create grid
    grid = np.mgrid[-half_x + xo:half_x + xo:xp * 1j, -half_y + yo:half_y + yo:yp * 1j, zo :z+zo:zp * 1j].swapaxes(0, -1)
    xl, yl, zl, _ = grid.shape
    grid = grid.reshape(-1, 3)

    # Superimpose grid on site
    bb = mol.select_atoms(f"resid {site} and name N CA C")
    if len(bb.atoms) != 3:
        raise RuntimeError(f'The provided protein does not have a canonical backbone at residue {site}.')

    N, CA, C = bb.positions

    mx, ori = get_grid_mx(N, CA, C)
    grid_tsf = grid @ mx + ori

    # Perform clash evaluation
    sel = mol.select_atoms(f'protein and not resid {site}')
    d = cdist(grid_tsf, sel.positions)
    mask = d < vdw_r
    mask2 = ~np.all(d > 5, axis=-1)

    # Remove clashing grid points
    mask = (~mask).prod(axis=-1) * mask2
    idxs = np.argwhere(mask).flatten()
    if len(idxs) == 0:
        return 0

    grid_tsf = grid_tsf[idxs]

    # check for discontinuities
    tree = cKDTree(grid_tsf)
    neighbors = tree.query_pairs(np.sqrt(2 * spacing) * 1.2)
    root_idx = np.argmin(np.linalg.norm(grid_tsf - CA, axis=-1))
    graph = ig.Graph(neighbors)

    # Use only points continuous with root_idx
    for g in graph.connected_components():
        if root_idx in g:
            grid_tsf = grid_tsf[g]
            break

    # process output
    if write:
        filename = write if isinstance(write, str) else 'grid.pdb'
        write_grid(grid_tsf, name=filename)

    if return_grid:
        return grid_tsf
    else:
        return len(grid_tsf) * spacing**3