Source code for chilife.MolSys

from __future__ import annotations

import weakref
from collections.abc import Iterable
import numbers
import operator
from functools import partial, update_wrapper

import MDAnalysis

from .ligand_utils import assign_atom_names
from .Topology import Topology, BondType, bonds_from_ccd_data
import numpy as np
from numpy.typing import ArrayLike
from scipy.spatial import cKDTree

import chilife

# TODO:
#   Behavior: AtomSelections should have orders to be enforced when indexing.

masked_properties = (
    "record_types",
    "atomids",
    "names",
    "altlocs",
    "altLocs",
    "resnames",
    "resnums",
    "icodes",
    "resids",
    "resis",
    "chains",
    "occupancies",
    "bs",
    "segs",
    "segids",
    "atypes",
    "types",
    "elements",
    "charges",
    "chiral",
    "ix",
    "resixs",
    "resindices",
    "segixs",
    "segindices",
    "_Atoms",
    "_Residues",
    "atoms",
)

singles = {
    "record_type": "record_types",
    "name": "names",
    "altloc": "altlocs",
    "altLoc": "altlocs",
    "atype": "atypes",
    "element": "elements",
    "type": "atypes",
    "resn": "resnums",
    "resname": "resnames",
    "resnum": "resnums",
    "resid": "resids",
    "icode": "icodes",
    "resi": "resis",
    "chain": "chains",
    "segid": "segids",
    "charge": "charges",
    "bfactor": "bs",
    "qfactor": "occupancies",
    "occupancy": "occupancies",
    "segix": "segixs",
    "resix": "resixs",
}


[docs] class MolecularSystemBase: """Base class for molecular systems containing attributes universal to all subclasses""" def __getattr__(self, key): if not isinstance(self, MolSys) and "molsys" not in self.__dict__: return self.__getattribute__(key) elif key in singles: return np.squeeze( self.molsys.__getattribute__(singles[key])[self.mask] ).flat[0] elif isinstance(self, MolSys) and key not in self.__dict__: return self.__getattribute__(key) elif key == "trajectory": return self.molsys.trajectory elif key == "atoms": return self.molsys.__getattribute__(key)[self.mask] elif key in masked_properties or key in singles: return np.squeeze(self.molsys.__getattribute__(key)[self.mask]) else: return self.molsys.__getattribute__(key) def __setattr__(self, key, value): if key in ("molsys", "mask"): super(MolecularSystemBase, self).__setattr__(key, value) elif key in singles: self.molsys.__getattribute__(singles[key])[self.mask] = value elif isinstance(self, MolSys) and key not in self.__dict__: super(MolecularSystemBase, self).__setattr__(key, value) elif key == "trajectory": self.molsys.__dict__["trajectory"] = value elif key in masked_properties or key in singles: self.molsys.__getattribute__(key)[self.mask] = value else: super(MolecularSystemBase, self).__setattr__(key, value) def __add__(self, other): if other.molsys is self.molsys: new_mask = np.unique(np.concatenate([self.mask, other.mask])) return AtomSelection(self.molsys, new_mask) else: return concat_molsys(self.atoms, other.atoms) def __sub__(self, other): if other.molsys is self.molsys: tmask = ~np.isin(self.mask, other.mask) new_mask = self.mask[tmask] return AtomSelection(self.molsys, new_mask) else: raise RuntimeError("You can not subtract atoms from a different molsys.")
[docs] def select_atoms(self, selstr): """ Select atoms from :class:`~MolSys` or :class:`~AtomSelection` based on selection a selection string, similar to the `select_atoms` method from `MDAnalysis <https://docs.mdanalysis.org/stable/documentation_pages/selections.html>`_ Parameters ---------- selstr : str Selection string Returns ------- atoms : :class:`~AtomSelection` Selected atoms """ mask = process_statement(selstr, self.logic_keywords, self.molsys_keywords) if hasattr(self, "mask"): t_mask = np.zeros(self.molsys.n_atoms, dtype=bool) t_mask[self.mask] = True mask *= t_mask mask = np.argwhere(mask).T[0] return AtomSelection(self.molsys, mask)
def __getitem__(self, item): if np.issubdtype(type(item), np.integer): relidx = self.molsys.ix[self.mask][item] return Atom(self.molsys, relidx) if isinstance(item, slice): return AtomSelection(self.molsys, self.mask[item]) elif isinstance(item, (np.ndarray, list, tuple)): item = np.asarray(item) if len(item) == 1 or item.sum() == 1: return Atom(self.molsys, self.mask[item]) elif item.dtype == bool: item = np.argwhere(item).T[0] return AtomSelection(self.molsys, item) else: return AtomSelection(self.molsys, self.mask[item]) elif hasattr(item, "__iter__"): if all([np.issubdtype(type(x), int) for x in item]): return AtomSelection(self.molsys, self.mask[item]) raise TypeError( "Only integer, slice type, and boolean mask arguments are supported at this time" ) @property def fname(self): """File name or functional name of the molecular system""" return self.molsys._fname @fname.setter def fname(self, val): self.molsys._fname = val @property def positions(self): """3-Dimensional cartesian coordinates of the atoms in the molecular system""" return self.coords @positions.setter def positions(self, val): self.coords = np.asarray(val) @property def coords(self): """3-dimensional cartesian coordinates of the atoms in the molecular system""" return self.trajectory.coords[self.trajectory.frame, self.mask] @coords.setter def coords(self, val): self.trajectory.coords[self.trajectory.frame, self.mask] = np.asarray(val) @property def n_atoms(self): """ "Number of atoms in the molecular system""" return len(self.mask) @property def n_residues(self): """Number of residues in the molecular system""" return len(np.unique(self.resixs[self.mask])) @property def n_chains(self): """Number of chains in the molecular system""" return len(np.unique(self.chains[self.mask])) @property def n_segments(self): """Number of segments in the molecular system""" return len(np.unique(self.segments[self.mask])) @property def residues(self): """:class:`~ResidueSelection` for all residues of the molecular system""" return ResidueSelection(self.molsys, self.mask) @property def segments(self): """:class:`~SegmentSelection` for all segments (chains) of the molecular system""" return SegmentSelection(self.molsys, self.mask) @property def logic_keywords(self): """Dictionary of logic keywords (strings) pertaining to :meth:`atom_selection` method. Keywords are mapped to the associated logic function""" return self.molsys._logic_keywords @property def molsys_keywords(self): """Dictionary of molecular system keywords (strings) pertaining to :meth:`atom_selection` method. Keywords are mapped to the associated molecular system attributes""" return self.molsys._molsys_keywords @property def universe(self): """Wrapper attribute to allow molecular systems to behave like MDAnalysis AtomGroup objects""" return self.molsys @property def bonds(self): """Array of atom indices specifying bonds between atoms. Note that the bonds are indexed with respect to the parent molecular system and are not specific to any atom, residue or chain selection""" bonds = self._bonds[self._bond_mask] if self._bonds is not None else None return bonds @bonds.setter def bonds(self, val): raise AttributeError( "Bonds cannot be set explicitly. Please use the `add_bond(s)` and `remove_bond(s)` methods." ) @property def bond_atom_names(self): """Array of atom names of bonded atoms corresponding to those in :meth:`~MolecularSystemBase.bonds`""" return np.array([self.molsys.names[b] for b in self.bonds]) @property def bond_types(self): """Array of chiLife BondType objects (enumerations) specifying the type of bond between atoms in :meth:`~MolecularSystemBase.bonds`""" if self.bonds is None or self._bond_mask is None: return None bond_types = self._bond_types[self._bond_mask] return bond_types @bond_types.setter def bond_types(self, val): raise AttributeError( "Bond types cannot be set explicitly. Please use the `add_bond(s)` and `remove_bond(s)` methods." ) @property def bond_chiral(self): """chirality of the bonds between atoms specified in :meth:`~MolecularSystemBase.bonds`""" if self.bonds is None or self._bond_mask is None: return None bond_chiral = self.molsys._bond_chiral[self._bond_mask] return bond_chiral def _check_mol_sys_for_bonds(self): """ Helper function to check if the current object has any bonds """ if not isinstance(self, MolSys): raise AttributeError( "Bonds and bond types can only be set on `MolSys` classes and not atom/residue/chain selections" )
[docs] def add_bond(self, bond, bond_type=None): """ Add singular bond between two atoms. Parameters ---------- bond : ArrayLike[int] size 2 array of integers specifying bonds between atom indices of the parent :class:`MolSys`. bond_type : ArrayLike[BondType] | None Array of chilife BondType instances. If not specified ``BondType.UNSPECIFIED`` will be used. """ self._check_mol_sys_for_bonds() bond_type = bond_type or BondType.UNSPECIFIED if self._check_overwrite_bond(bond, bond_type): return None elif len(self._bonds) == 0: self._bonds = np.array([bond]) self._bond_types = np.array([bond_type]) else: self._bonds = np.concatenate((self._bonds, [bond])) self._bond_types = np.concatenate((self._bond_types, [bond_type])) self._bond_mask = np.arange(len(self._bonds)) self.topology = Topology(self, self._bonds, self._bond_types)
def _check_overwrite_bond(self, bond, bond_type): if self.bonds is None: return False if len(self.bonds) == 0: return False exists = np.argwhere(np.all(self._bonds == bond, axis=1)).flatten() exists_reversed = np.argwhere( np.all(self._bonds == bond[::-1], axis=1) ).flatten() if exists.size > 0: if bond_type is not None: self._bond_types[exists[0]] = bond_type elif exists_reversed.size > 0: if bond_type is not None: self._bond_types[exists_reversed[0]] = bond_type else: return False return True
[docs] def add_bonds(self, bonds, bond_type=None, bond_chiral=None, update_topology=True): """ Add multiple bonds between atoms. Parameters ---------- bonds : ArrayLike[int] N by 2 array of integers specifying bonds between atom indices of the parent :class:`MolSys`. bond_type : ArrayLike[BondType] | None Array of chilife BondType instances. Must be the same length as ``bonds``. If not specified ``BondType.UNSPECIFIED`` will be used. bond_chiral : ArrayLike[str] | None chirality of the bonds. update_topology : bool Toggle updating the attached MolSys Topology. """ self._check_mol_sys_for_bonds() bonds = np.atleast_1d(bonds) bond_type = ( bond_type if bond_type is not None else [BondType.UNSPECIFIED] * len(bonds) ) bond_chiral = bond_chiral if bond_chiral is not None else ["?"] * len(bonds) if not isinstance(bond_type, Iterable): bond_type = [bond_type] * len(bonds) if not isinstance(bond_chiral, Iterable): bond_chiral = [bond_chiral] * len(bonds) if len(bonds) != len(bond_type): raise AttributeError( "Failed to add bonds to the MolSys because the number of bonds and bond_types do not match." ) if len(bonds) != len(bond_chiral): raise AttributeError( "Failed to add bonds to the MolSys because the number of bonds and bond_chiral do not match." ) new_bonds, new_bond_types, new_bond_chiral = [], [], [] for b, btype, chiral in zip(bonds, bond_type, bond_chiral): if not self._check_overwrite_bond(b, btype): new_bonds.append(b) new_bond_types.append(btype) new_bond_chiral.append(chiral) # Convert to numpy arrays new_bonds = np.array(new_bonds) new_bond_types = np.array(new_bond_types) new_bond_chiral = np.array(new_bond_chiral) if self.bonds is None or len(self.bonds) == 0: self._bonds = new_bonds self._bond_types = new_bond_types self._bond_chiral = new_bond_chiral else: self._bonds = np.concatenate((self._bonds, new_bonds)) self._bond_types = np.concatenate((self._bond_types, new_bond_types)) self._bond_chiral = np.concatenate((self._bond_chiral, new_bond_chiral)) # Update persistent residue/segement objects if hasattr(self, "_residue_list"): for res in self._residues: res._update_bond_mask() if hasattr(self, "_segments_list"): for seg in self._segments: seg._update_bond_mask() self._bond_mask = np.arange(len(self._bonds)) if update_topology: self.topology = Topology(self, self._bonds, self._bond_types)
[docs] def remove_bond(self, bond): """ Remove a singular bond between two atoms. Parameters ---------- bond : ArrayLike[int] Array of two integers specifying atoms involved in the bond that is to be removed. """ self.remove_bonds([bond]) self._bond_mask = np.arange(len(self._bonds))
[docs] def remove_bonds(self, bonds=None): """ Remove multiple bonds between atoms. Parameters ---------- bonds : ArrayLike[int] N by 2 array of integers specifying the parent :class:`MolSys` atom indices of the atoms that are to be un-bonded. """ if bonds is None: self._bonds = np.array([[]]) self._bond_types = np.array([]) self._bond_mask = np.array([], dtype=bool) return None self._check_mol_sys_for_bonds() mask = np.ones(len(bonds), dtype=bool) for bond in bonds: exists = np.all(self._bonds == bond, axis=1) exists_reversed = np.all(self._bonds == bond[::-1], axis=1) if exists.size > 0: mask *= ~exists elif exists_reversed: mask *= ~exists_reversed self._bonds = self.bonds[mask] self._bond_types = self.bond_types[mask] self._bond_mask = np.arange(len(self._bonds))
def _update_bond_mask(self, bond_mask=None): if bond_mask is not None: self._bond_mask = bond_mask elif self.molsys._bonds is None: self._bond_mask = None else: self._bond_mask = np.argwhere( np.isin(self.molsys._bonds, self.mask).all(axis=1) ).flatten()
[docs] def copy(self): """Creates a deep copy of the underlying MolSys and returns an :class:`AtomSelection` from the new copy matching the current selection if it exists.""" p2 = self.molsys.copy() return AtomSelection(p2, self.mask.copy())
def __iter__(self): """Iterate over all atoms of the MolSys""" for idx in self.mask: yield self.molsys.atoms[idx] def __eq__(self, value): """Checks if two molecular systems are equal by making sure they have the same ``self.molsys`` and the same mask""" return self.molsys is value.molsys and self.mask == value.mask def write_pdb(self, filename): pass
[docs] def write_cif(self, filename, use_bio_ccd: bool = True): """ Write a CIF file of the current ``MolSys``. Note that if there are bonds or formal charges this information will be preserved in the _chem_comp_atom and _chem_comp_bond fields. Parameters ---------- filename : str | Path File name or ``Path`` to write the cif file to. use_bio_ccd : bool Use the chilife ``bio_ccd`` to assign bond information to residues where this information is not already present. If the residue is not in the ``bio_ccd`` and does not have bond information in the ``MolSys`` objectno CCD information will be written. """ segids, stype = [], [] poly_ids, strand_ids, poly_type = [], [], [] poly_asym, poly_auth_num, poly_icodes = [], [], [] poly_seq_eids, poly_seq_hetero, poly_seq_mon_id, poly_seq_num = [], [], [], [] nonpoly_asym_id, nonpoly_auth_seq, nonpoly_eid = [], [], [] nonpoly_mon_id, nonpoly_icode, nonpoly_seq_num, nonpoly_sid = [], [], [], [] for i, seg in enumerate(self.segments): segids.append(str(i + 1)) if len(seg.residues) > 1: stype.append("polymer") if any(x.resname in chilife.nataa_codes for x in seg.residues): poly_type.append("polypeptide") elif any(x.resname in chilife.natnu_codes for x in seg.residues): poly_type.append("nucleotide") else: poly_type.append("unknown") poly_ids.append(str(i + 1)) strand_ids.append(str(seg.segid)) poly_seq_num.extend([str(x) for x in range(1, len(seg.residues) + 1)]) poly_seq_mon_id.extend([res.resname for res in seg.residues]) poly_seq_hetero.extend( [ "y" if np.any(res.atoms.record_type == "HETATM") else "n" for res in seg.residues ] ) poly_seq_eids.extend([poly_ids[-1]] * len(seg.residues)) poly_asym.extend([str(res.segid) for res in seg.residues]) poly_auth_num.extend([str(res.resnum) for res in seg.residues]) poly_icodes.extend( [res.icode if res.icode.strip() else "." for res in seg.residues] ) else: res = seg.residues[0] nonpoly_eid.append(str(i + 1)) nonpoly_asym_id.append(str(seg.segid)) nonpoly_auth_seq.append(str(res.resnum)) nonpoly_mon_id.append(res.resname) nonpoly_icode.append(res.icode if res.icode != "" else ".") nonpoly_seq_num.append(str(1)) nonpoly_sid.append(str(seg.segid)) stype.append("non-polymer") entites = self.segindices atom_site_data = struct_to_cif_atom_site(self) local_ccd = {} for res in self._residues: if res.resname in local_ccd: continue elif res.resname in chilife.bio_ccd and use_bio_ccd: local_ccd[res.resname] = chilife.bio_ccd[res.resname] else: local_ccd[res.resname] = res_to_ccd(res) ccd_list = sorted(local_ccd.keys()) local_ccd = chilife.io.join_ccd_info(ccd_list, local_ccd) cif_data = { self.name: { "entry": {"id": self.name}, "atom_type": { "symbol": [str(e) for e in sorted(np.unique(self.elements))] }, "audit_author": { "name": ["chiLife", "Maxx Tessmer", "Stefan Stoll"], "pdbx_ordinal": ["1", "2", "3"], }, "citation": { "book_publisher": "?", "country": "US", "id": "primary", "journal_full": "PLoS Computational Biology", "journal_id_ISSN": "1553-734X", "journal_volume": "19", "journal_issue": "3", "journal_page_first": "e1010834", "journal_page_last": "e1010834", "title": "chiLife: An open-source Python package for in silico spin labeling and integrative protein modeling", "year": "2023", }, "citation_author": { "citation_id": ["primary", "primary"], "name": ["Maxx Tessmer", "Stefan Stoll"], "ordinal": ["1", "2"], }, "entity": { "id": segids, "pdbx_description": ["."] * len(segids), "type": stype, }, "entity_poly": { "entity_id": poly_ids, "pdbx_strand_id": strand_ids, "pdbx_type": poly_type, }, "entity_poly_seq": { "entity_id": poly_seq_eids, "hetero": poly_seq_hetero, "mon_id": poly_seq_mon_id, "num": poly_seq_num, }, "ma_data": { "content_type": "model_coordinates", "id": 1, "name": "Model", }, "pdbx_nonpoly_scheme": { "asym_id": nonpoly_asym_id, "auth_seq_num": nonpoly_auth_seq, "entity_id": nonpoly_eid, "mon_id": nonpoly_mon_id, "pdb_ins_code": nonpoly_icode, "pdb_seq_num": nonpoly_seq_num, "pdb_strand_id": nonpoly_sid, }, "pdbx_poly_seq_scheme": { "asym_id": poly_asym, "auth_seq_num": poly_auth_num, "entity_id": poly_seq_eids, "hetero": poly_seq_hetero, "mon_id": poly_seq_mon_id, "pdb_ins_code": poly_icodes, "pdb_seq_num": poly_seq_num, "pdb_strand_id": poly_asym, "seq_id": poly_seq_num, }, "software": { "classification": "model building", "date": "2023-03-31", "description": "Protein modeling", "name": "chiLife", "pdbx_ordinal": "1", "type": "python package", "version": chilife.__version__, }, "struct_asym": { "entity_id": poly_ids, "id": strand_ids, }, "atom_site": atom_site_data, **local_ccd, } } # Delete empty entries del_list = [] for k1, v1 in cif_data[self.name].items(): bools = [] for k2, v2 in v1.items(): bools.append(bool(v2)) if np.all(~np.array(bools)): del_list.append(k1) for key in del_list: del cif_data[self.name][key] chilife.io.write_cif(filename, cif_data)
def _make_sdf_data(self): sdf_data = [] properties = {} if len(chg_mask := np.argwhere(self.charges != 0).flatten().tolist()) > 0: properties["CHG"] = { "atom_ids": chg_mask, "charges": self.charges[chg_mask].tolist(), } for ts in self.trajectory: sdf_dict = { "name": self.name, "software": f"chiLife {chilife.__version__}", "comment": "", "n_atoms": self.n_atoms, "n_bonds": len(self.bonds), "chiral": int(np.any(~np.isin(self.chiral, ("?", "N")))), "format": "v2000", "atoms": [ { "index": i, "xyz": atom.position.tolist(), "element": atom.element, "mass difference": 0, "charge": 0, "stereo": 0, "valence": 0, } for i, atom in enumerate(self.atoms) ], "bonds": [ { "idx1": a1, "idx2": a2, "type": chilife.io.XL_BOND_TO_SDF[btype], "stereo": 0, } for (a1, a2), btype in zip(self.bonds, self.bond_types) ], "properties": properties, } sdf_data.append(sdf_dict) return sdf_data
[docs] def write_sdf(self, filename): """ Write an SDF file from the current ``MolSys`` object. Note there is no support for sdf style chirality, isotopes, valence, RAD, ISO, RBC, SUB, UNS, Parameters ---------- filename : str | Path Name of the file """ sdf_data = self._make_sdf_data() chilife.io.write_sdf(sdf_data, filename)
[docs] class MolSys(MolecularSystemBase): """ An object containing all attributes of a molecular system. Parameters ---------- atomids : np.ndarray Array of atom indices where each index is unique to each atom in the molecular system. names : np.ndarray Array of atom names, e.g. 'N', 'CA' 'C'. altlocs : np.ndarray Array of alternative location identifiers for each atom. resnames : np.ndarray Array of residue names for each atom. resnums : np.ndarray Array of residue numbers for each atom. chains : np.ndarray Array of chain identifiers for each atom. trajectory : np.ndarray Cartesian coordinates of each atom for each state of an ensemble or for each timestep of a trajectory. occupancies : np.ndarray Occupancy or q-factory for each atom. bs : np.ndarray thermal, or b-factors for each atom. segs : np.ndarray Segment/chain IDs for each atom. atypes : np.ndarray Atom/element types. charges : np.ndarray Formal or partial charges assigned to each atom. chiral : nd.array | None Chirality specifier for each atom. resindices: np.ndarray | None Array of the residue indices each atom belongs to. resindices are 0-indexed. segindices: np.ndarray | None Array of the segment index each atom belongs to. segindices are 0-indexed. bonds : ArrayLike | None Array of atom ID pairs corresponding to all atom bonds in the system. bond_types : ArrayLike | None Array of BondType objects corresponding to all bonds in the system. bond_chiral: ArrayLike | None Array specifying the chirality (if any) of each bond. name : str Name of molecular system use_ccd: dict | None A dictionary containing ccd information needed to generate topological information. This can still be used if bonds are provided but some are missing. """ def __init__( self, record_types: np.ndarray, atomids: np.ndarray, names: np.ndarray, altlocs: np.ndarray, resnames: np.ndarray, resnums: np.ndarray, icodes: np.ndarray, chains: np.ndarray, trajectory: np.ndarray, occupancies: np.ndarray, bs: np.ndarray, segs: np.ndarray, atypes: np.ndarray, charges: np.ndarray, chiral: np.ndarray | None = None, resindices: np.ndarray | None = None, segindices: np.ndarray | None = None, bonds: np.ndarray | None = None, bond_types: np.ndarray | None = None, bond_chiral: np.ndarray | None = None, name: str = "Noname_MolSys", use_ccd: dict | None = None, ): # self.molsys = weakref.proxy(self) self.record_types = record_types.copy() self.atomids = atomids.copy() self.names = names.copy().astype("U4") self.altlocs = altlocs.copy() self.resnames = resnames.copy() self.resnums = resnums.copy() self.icodes = icodes.copy() self.chains = chains.copy() self.trajectory = Trajectory(trajectory.copy(), self) self.occupancies = occupancies.copy() self.bs = bs.copy() self.segs = segs.copy() if np.any(segs != "") else chains.copy() self.atypes = atypes.copy() self.charges = charges.copy() self.chiral = ( chiral.copy() if chiral is not None else np.array(["?"] * len(atomids)) ) self._fname = name self.ix = np.arange(len(self.atomids)) self.mask = np.arange(len(self.atomids)) # resix_borders = np.nonzero(np.r_[1, np.diff(self.resnums)[:-1]]) # resix_borders = np.append(resix_borders, [self.n_atoms]) # resixs = [] # for i in range(len(resix_borders) - 1): # dif = resix_borders[i + 1] - resix_borders[i] # resixs.append(np.ones(dif, dtype=int) * i) if resindices is None: residues, resixs = [], [] i = -1 for aidx in range(self.n_atoms): residue_id = self.resnums[aidx], self.icodes[aidx], self.chains[aidx] if residue_id not in residues: residues.append(residue_id) i += 1 resixs.append(i) self.resixs = np.array(resixs) else: self.resixs = resindices self.resindices = self.resixs if segindices is None: segix_map = {v: i for i, v in enumerate(np.unique(self.segs))} self.segixs = np.array([segix_map[k] for k in self.segs]) else: self.segixs = segindices self.segindices = self.segixs self.is_protein = np.array( [res in chilife.SUPPORTED_RESIDUES for res in resnames] ) self._molsys_keywords = { "id": self.atomids, "record_type": self.record_types, "name": self.names, "altloc": self.altlocs, "resname": self.resnames, "resnum": self.resnums, "resix": self.resixs, "resindex": self.resixs, "icode": self.icodes, "chain": self.chains, "occupancies": self.occupancies, "b": self.bs, "chiral": self.chiral, "segid": self.segs, "segix": self.segixs, "segindex": self.segixs, "type": self.atypes, "charges": self.charges, "resi": self.resnums, "resid": self.resnums, "i.": self.resnums, "s.": self.segs, "c.": self.chains, "r.": self.resnames, "n.": self.names, "resn": self.resnames, "q": self.occupancies, "elem": self.atypes, "element": self.atypes, "protein": self.is_protein, "_len": self.n_atoms, } self._logic_keywords = { "and": operator.mul, "or": operator.add, "not": unot, "!": unot, "<": operator.lt, ">": operator.gt, "==": operator.eq, "<=": operator.le, ">=": operator.ge, "!=": operator.ne, "byres": partial(byres, molsys=self.molsys), "within": update_wrapper(partial(within, molsys=self.molsys), within), "around": update_wrapper(partial(within, molsys=self.molsys), within), "point": update_wrapper(partial(point, molsys=self.molsys), point), } # Aliases self.resns = self.resnames self.resis = self.resnums self.altLocs = self.altlocs self.resids = self.resnums self.segids = self.segs self.types = self.atypes self.elements = self.atypes self.bfactors = self.bs self._bonds = None self._bond_types = None self._bond_mask = np.array([], dtype=bool) self._bond_chiral = None add_ccd_bonds = False if use_ccd is not None: if use_ccd is True: use_ccd = chilife.bio_ccd unique_resnames = np.unique(resnames) unique_resnames = [ res for res in unique_resnames if (res in use_ccd) and ("chem_comp_atom" in use_ccd[res]) ] atom_chiral_mapping = { (res, aid): c for res in unique_resnames for aid, c in zip( use_ccd[res]["chem_comp_atom"]["atom_id"], use_ccd[res]["chem_comp_atom"]["pdbx_stereo_config"], ) } chiral = np.array( [ atom_chiral_mapping.get((res, atom_id), "?") for res, atom_id in zip(resnames, names) ] ) chiral_mask = self.chiral == "?" self.chiral[chiral_mask] = chiral[chiral_mask] ccd_bonds, ccd_bond_types, ccd_bond_chiral = bonds_from_ccd_data( self, use_ccd ) if bonds is None: bonds = ccd_bonds bond_types = ccd_bond_types bond_chiral = ccd_bond_chiral else: add_ccd_bonds = True # Add Topology if bonds is not None: self._bonds = bonds.copy() self._bond_types = ( bond_types.copy() if bond_types is not None else np.array([BondType.UNSPECIFIED] * len(bonds)) ) self._bond_mask = np.arange(len(self._bonds)) if bond_chiral is not None and any(bond_chiral): self._bond_chiral = bond_chiral.copy() else: self._bond_chiral = np.array(["?"] * len(bonds)) if hasattr(self, "_residue_list"): for res in self._residues: res._update_bond_mask() if hasattr(self, "_segments_list"): for seg in self._segments: seg._update_bond_mask() if add_ccd_bonds: self.add_bonds( ccd_bonds, ccd_bond_types, ccd_bond_chiral, update_topology=False ) # Atoms creation is required to be last self.atoms = AtomSelection(self, self.mask, self._bond_mask) self.topology = Topology(self, bonds, bond_types) if bonds is not None else None
[docs] @classmethod def from_pdb(cls, file_name, sort_atoms=False, **kwargs): """ Reads a pdb file and returns a MolSys object Parameters ---------- file_name : str, Path Path to the PDB file to read. sort_atoms : bool Presort atoms for consisted internal coordinate definitions kwargs: dict Additional arguments to be passed to the :class:`MolSys` constructor. Returns ------- cls : MolSys Molecular system object of the PDB structure. """ pdb_dict = chilife.read_pdb(file_name, sort_atoms=sort_atoms) # Add chain IDs if not present. chain_data = pdb_dict["chains"] used_chains = np.unique(chain_data) if "" in used_chains: available_chains = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" for new_chain in available_chains: if new_chain not in used_chains: break chain_data[chain_data == ""] = new_chain return cls(**pdb_dict, **kwargs)
[docs] @classmethod def from_cif(cls, file_name): """ Reads a cif file and returns a MolSys object Parameters ---------- file_name : str, Path Path to the CIF file to read. Returns ------- cls : MolSys Molecular system object of the CIF structure. """ cif_data = chilife.io.read_cif(file_name) if len(cif_data) > 1: raise RuntimeError( "More than one data entry found in the cif file. Please seperate into multiple files or join as " "separate chains in one entry" ) name = list(cif_data.keys())[0] cif_data = cif_data[name] atom_site_data = cif_data["atom_site"] model_id = np.array([atom_site_data["pdbx_PDB_model_num"]]).flatten() model_ids = np.unique(model_id) n_models = len(model_ids) n_atoms = np.sum(model_id == model_ids[0]) anames = np.array(atom_site_data["label_atom_id"][:n_atoms]) for i in model_ids: if np.sum(model_id == i) != n_atoms: raise RuntimeError( "This CIF file contains multiple structure models with different numbers of atoms. chiLife " "currently only supports reading CIF files where all models have the exact same number of atoms." ) model_anames = np.array(atom_site_data["label_atom_id"])[model_id == i] if not np.all(model_anames == anames): raise RuntimeError( "This CIF file contains multiple structure models with different atom orders. chiLife currently " "only supports reading CIF files where all models have the exact same atom orders." ) record_types = np.array(atom_site_data["group_PDB"][:n_atoms]) atomids = np.array(atom_site_data["id"][:n_atoms]) altlocs = np.array( ["" if a == "." else a for a in atom_site_data["label_alt_id"][:n_atoms]] ) atypes = np.array(atom_site_data["type_symbol"][:n_atoms]) resnames = np.array(atom_site_data["label_comp_id"][:n_atoms]) resnums = np.array(atom_site_data["auth_seq_id"][:n_atoms], dtype=int) icodes = np.array(atom_site_data["pdbx_PDB_ins_code"][:n_atoms]) icodes[icodes == "?"] = "" segs = np.char.array(atom_site_data["label_asym_id"][:n_atoms]) + np.char.array( atom_site_data["label_entity_id"][:n_atoms] ) occupancies = np.array(atom_site_data["occupancy"][:n_atoms], dtype=float) bs = np.array(atom_site_data["B_iso_or_equiv"][:n_atoms], dtype=float) chains = np.array(atom_site_data["auth_asym_id"][:n_atoms]) charges = np.array(atom_site_data["pdbx_formal_charge"][:n_atoms], dtype="U3") charges[charges == "?"] = "nan" charges = charges.astype(float) traj = np.empty((n_models, n_atoms, 3)) for i, id in enumerate(model_ids): sl = slice(i * n_atoms, (i + 1) * n_atoms) xs = np.array(atom_site_data["Cartn_x"][sl], dtype=float) ys = np.array(atom_site_data["Cartn_y"][sl], dtype=float) zs = np.array(atom_site_data["Cartn_z"][sl], dtype=float) traj[i] = np.dstack([xs, ys, zs]) # Get ccd ccd_data = chilife.io.create_ccd_dicts(cif_data) msys = cls( record_types, atomids, anames, altlocs, resnames, resnums, icodes, chains, traj, occupancies, bs, segs, atypes, charges, name=name, use_ccd=ccd_data, ) msys._cif_data = cif_data return msys
[docs] @classmethod def from_sdf(cls, file_name): """ Reads a sdf file and returns a MolSys object Parameters ---------- file_name : str, Path Path to the sdf file to read. Returns ------- cls : MolSys Molecular system object of the sdf structure. """ sdf_data = chilife.io.read_sdf(file_name) mol0 = sdf_data[0] name = mol0["name"] or "LIG" atom_elements_0 = np.array([a["element"] for a in mol0["atoms"]]) n_atoms = len(atom_elements_0) res_name = mol0["name"][:3] if mol0["name"] != "" else "LIG" record_types = np.array(["HETATM"] * n_atoms) atomids = np.array([a["index"] for a in mol0["atoms"]]) anames = np.array(assign_atom_names(mol0)) altlocs = np.array([""] * n_atoms) resnames = np.array([res_name] * n_atoms) resnums = np.ones(n_atoms, dtype=int) icodes = np.array([""] * n_atoms) chains = np.array(["A"] * n_atoms) occupancies = np.ones(n_atoms) bs = np.zeros(n_atoms) segs = np.array(["A"] * n_atoms) atypes = atom_elements_0 # CHG block takes precidence over ATOM block charges if "CHG" in mol0["properties"]: charges = np.zeros(n_atoms) chg_idxs = mol0["properties"]["CHG"]["atom_ids"] chg_values = mol0["properties"]["CHG"]["charges"] charges[chg_idxs] = chg_values else: charges = np.array( [ chilife.io.SDF_ATOM_CHARGE_TO_INT[a["charge"]] for a in sdf_data[0]["atoms"] ] ) bonds, bond_types = [], [] for bond in mol0["bonds"]: bonds.append([bond["idx1"], bond["idx2"]]) bond_types.append(chilife.io.SDF_BOND_TO_XL[bond["type"]]) bonds, bond_types = np.array(bonds), np.array(bond_types) trajectory = [] for struct_dict in sdf_data: atom_elements_i = np.array([a["element"] for a in struct_dict["atoms"]]) if not np.all(atom_elements_i == atom_elements_0): raise RuntimeError( "This structures in this sdf file contain different atoms, suggesting they are not all the same " "molecule. chiLife currently only supports reading sdf files as MolSys objects if all molecules in " "the sdf are the same with only atom positions differing." ) coords = [a["xyz"] for a in struct_dict["atoms"]] trajectory.append(coords) trajectory = np.array(trajectory) msys = cls( record_types, atomids, anames, altlocs, resnames, resnums, icodes, chains, trajectory, occupancies, bs, segs, atypes, charges, bonds=bonds, bond_types=bond_types, name=name, ) msys._sdf_data = sdf_data return msys
[docs] @classmethod def from_arrays( cls, anames: ArrayLike, atypes: ArrayLike, resnames: ArrayLike, resindices: ArrayLike, resnums: ArrayLike, segindices: ArrayLike, record_types: ArrayLike = None, altlocs: ArrayLike = None, icodes: ArrayLike = None, segids: ArrayLike = None, trajectory: ArrayLike = None, **kwargs, ) -> MolSys: """ Create a MolSys object from a minimal set of arrays. Parameters ---------- anames : ArrayLike Array of atom names. atypes : ArrayLike Array of atom types/elements. resnames : ArrayLike Array of residue names for each atom. resindices : ArrayLike Array of residue indices for each atom. resnums : ArrayLike Array of residue numbers for each atom. segindices : ArrayLike Array of segment indices for each atom. record_types : ArrayLike | None Array of record types for each atom, e.g. ATOM or HETATM altlocs: ArrayLike | None Array of alternate location specifies for each atom. icodes: ArrayLike | None Array of insertion codes for each atom. segids : ArrayLike Array of segment IDs for each atom. trajectory : ArrayLike Array of cartesian coordinates for each atom and each state of an ensemble or frame of a trajectory. kwargs : dict Dictionary of additional arguments to pass the the :class:`MolSys` constructor. Returns ------- cls : MolSys Molecular system object of the PDB structure. """ anames = np.asarray(anames) atypes = np.asarray(atypes) resindices = np.asarray(resindices) segindices = np.asarray(segindices) n_atoms = len(anames) atomids = np.arange(n_atoms) altlocs = np.array([""] * n_atoms) if altlocs is None else np.array(altlocs) icodes = np.array([""] * n_atoms) if icodes is None else np.array(icodes) if len(resnums) != n_atoms: resnums = np.array([resnums[residx] for residx in resindices]) else: resnums = np.asarray(resnums) if len(resnames) != n_atoms: resnames = np.array([resnames[residx] for residx in resindices]) else: resnames = np.asarray(resnames) if len(icodes) != n_atoms: icodes = np.array([icodes[residx] for residx in resindices]) else: icodes = np.asarray(icodes) if len(segindices) != n_atoms: segindices = np.array([segindices[x] for x in resindices]) if segids is None: segids = np.array(["ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i] for i in segindices]) elif len(segids) != n_atoms: segids = np.array([segids[i] for i in segindices]) else: segids = np.asarray(segids) chains = segids if trajectory is None: trajectory = np.empty((1, n_atoms, 3)) elif len(trajectory.shape) == 2: trajectory = trajectory[None, ...] if record_types is None: record_types = np.array( [ "ATOM" if rnme in chilife.SUPPORTED_RESIDUES else "HETATM" for rnme in resnames ] ) occupancies = np.ones(n_atoms) bs = np.ones(n_atoms) charges = kwargs.pop("charges", np.zeros(n_atoms)) return cls( record_types, atomids, anames, altlocs, resnames, resnums, icodes, chains, trajectory, occupancies, bs, segids, atypes, charges, **kwargs, )
[docs] @classmethod def from_atomsel(cls, atomsel, frames=None): """ Create a new MolSys object from an existing :class:`~AtomSelection` or `MDAnalysis.AtomGroup <https://userguide.mdanalysis.org/stable/atomgroup.html>`_ object. Parameters ---------- atomsel : :class:`~AtomSelection`, MDAnalysis.AtomGroup The :class:`~AtomSelection` or MDAnalysis.AtomGroup from which to create the new MolSys object. frames : int, Slice, ArrayLike Frame index, slice or array of frame indices corresponding to the frames of the atom selection you wish to extract into a new :class:`~MolSys` object. Returns ------- cls : MolSys Molecular system object of the PDB structure. """ if isinstance(atomsel, (MDAnalysis.AtomGroup, AtomSelection)): U = atomsel.universe elif isinstance(atomsel, MDAnalysis.Universe): U = atomsel atomsel = U.atoms if frames is None: frames = slice(0, len(U.trajectory)) record_types = ( atomsel.record_types if hasattr(atomsel, "record_types") else None ) icodes = atomsel.icodes if hasattr(atomsel, "icodes") else None anames = atomsel.names atypes = atomsel.types resnames = atomsel.resnames resnums = atomsel.resnums ridx_map = {num: i for i, num in enumerate(np.unique(atomsel.resindices))} resindices = np.array([ridx_map[num] for num in atomsel.resindices]) segids = atomsel.segids sidx_map = {num: i for i, num in enumerate(np.unique(atomsel.segindices))} segindices = np.array([sidx_map[num] for num in atomsel.segindices]) if ( isinstance(atomsel, MDAnalysis.AtomGroup) and hasattr(atomsel, "bonds") and len(atomsel.bonds) > 0 ): bonds = atomsel.bonds.indices bond_types = np.array([BondType.UNSPECIFIED] * len(bonds)) elif isinstance(atomsel, MolecularSystemBase) and atomsel.bonds is not None: bonds = atomsel.bonds bond_types = atomsel.bond_types else: bonds = None bond_types = None # remap bond indices if bonds is not None: k = {idx: i for i, idx in enumerate(atomsel.ix)} bonds = np.array( [[k[a1], k[a2]] for a1, a2 in bonds if a1 in k and a2 in k], dtype=int ) if hasattr(U.trajectory, "coordinate_array"): trajectory = U.trajectory.coordinate_array[frames][:, atomsel.ix, :] else: trajectory = [] for ts in U.trajectory[frames]: trajectory.append(atomsel.positions) trajectory = np.array(trajectory) return cls.from_arrays( anames, atypes, resnames, resindices, resnums, segindices, segids=segids, trajectory=trajectory, record_types=record_types, icodes=icodes, bonds=bonds, bond_types=bond_types, )
[docs] @classmethod def from_rdkit(cls, mol): """ Create a MolSys from an rdkit Mol object with embedded conformers. Parameters ---------- mol : rdkit.Chem.rdchem.Mol An rdkit Mol object. Returns ------- cls: MolSys Molecular system object of the PDB structure. """ atypes = np.array([a.GetSymbol() for a in mol.GetAtoms()]) anames = np.array([a + str(i) for i, a in enumerate(atypes)]) resnames = np.array(["UNK" for _ in anames]) resindices = np.array([0] * len(anames)) resnums = np.array([1] * len(anames)) segindices = np.array([0] * len(anames)) segids = np.array(["A"] * len(anames)) trajectory = np.array([conf.GetPositions() for conf in mol.GetConformers()]) bonds = np.array( [[b.GetBeginAtomIdx(), b.GetEndAtomIdx()] for b in mol.GetBonds()] ) bond_types = np.array([BondType(b.GetBondType()) for b in mol.GetBonds()]) return cls.from_arrays( anames, atypes, resnames, resindices, resnums, segindices, segids=segids, trajectory=trajectory, bonds=bonds, bond_types=bond_types, )
[docs] @classmethod def from_openMM(cls, simulation): """ Create a MolSys from an openMM Simulation object. Parameters ---------- simulation : openmm.simulation.Simulation The simulation object to create the MolSys object from. Returns ------- sys : chilife.MolSys chiLife molecular system as a clone from """ try: from openmm.unit import angstrom except: raise RuntimeError( "You must have the optional openMM dependency installed to use from_openMM" ) top = simulation.topology atypes = np.array([a.element.symbol for a in top.atoms()]) anames = np.array([a.name for a in top.atoms()]) resnames = np.array([a.residue.name for a in top.atoms()]) resindices = np.array([a.residue.index for a in top.atoms()]) resnums = np.array([int(a.residue.id) for a in top.atoms()]) segindices = np.array([a.residue.chain.index for a in top.atoms()]) segids = np.array([a.residue.chain.id for a in top.atoms()]) trajectory = ( simulation.context.getState(getPositions=True) .getPositions(asNumpy=True) .value_in_unit(angstrom) ) bonds = np.array([[bond.atom1.index, bond.atom2.index] for bond in top.bonds()]) # bond_types = np.array([ for bond in top.bonds()]) sys = cls.from_arrays( anames, atypes, resnames, resindices, resnums, segindices, segids=segids, trajectory=trajectory, bonds=bonds, # bond_types=bond_types, ) return sys
[docs] def copy(self): """Create a deep copy of the MolSys.""" if hasattr(self, "bonds"): bonds = self.bonds bond_types = self.bond_types else: bonds = None bond_types = None return MolSys( record_types=self.record_types, atomids=self.atomids, names=self.names, altlocs=self.altlocs, resnames=self.resnames, resnums=self.resnums, icodes=self.icodes, chains=self.chains, trajectory=self.trajectory.coords, occupancies=self.occupancies, bs=self.bs, segs=self.segs, atypes=self.atypes, charges=self.charges, bonds=bonds, bond_types=bond_types, name=self.names, )
[docs] def load_new(self, coordinates): """ Load a new set of 3-dimensional coordinates into the MolSys. Parameters ---------- coordinates : ArrayLike Set of new coordinates to load into the MolSys """ self.trajectory.load_new(coordinates=coordinates)
@property def _atoms(self): if not hasattr(self, "_Atoms"): self._Atoms = np.array( [Atom(weakref.proxy(self), i) for i in range(self.n_atoms)] ) return self._Atoms @property def _residues(self): if not hasattr(self, "_residue_list"): _, first_res_ix = np.unique(self.resixs, return_index=True) self._residue_list = [ Residue(weakref.proxy(self), [ix]) for ix in first_res_ix ] return self._residue_list @property def _segments(self): if not hasattr(self, "_segments_list"): _, first_seg_ix = np.unique(self.segixs, return_index=True) self._segments_list = [ Segment(weakref.proxy(self), ix) for ix in first_seg_ix ] return self._segments_list @property def molsys(self): return self
[docs] class Trajectory: """ Object containing and managing 3-dimensional coordinates of :class:`~MolSys` objects, particularly when there are multiple states or time steps. Parameters ---------- coordinates : np.ndarray 3-dimensional coordinates for all atoms and all states/frames of the associated :class:`~MolSys` object. molsys : MolSys The associated :class:`~MolSys` object. timestep : float The time step or change in time between frames/states of the trajectory/ensemble. """ def __init__(self, coordinates: np.ndarray, molsys: MolSys, timestep: float = 1): self.molsys = molsys self.timestep = timestep self.coords = coordinates self.coordinate_array = coordinates self._frame = 0 self.time = np.arange(0, len(self.coords)) * self.timestep
[docs] def load_new(self, coordinates): """ Load a new set (trajectory or ensemble) of 3-dimensional coordinates into the Trajectory. Parameters ---------- coordinates : ArrayLike Set of new coordinates to load into the MolSys """ self.coords = coordinates self.coordinate_array = coordinates self.time = np.arange(0, len(self.coords)) * self.timestep
def __getitem__(self, item): if isinstance(item, (slice, list, np.ndarray)): return TrajectoryIterator(self, item) elif isinstance(item, numbers.Integral): self._frame = item return self.time[self._frame] def __iter__(self): return iter(TrajectoryIterator(self)) def __len__(self): return len(self.time) @property def frame(self): """The frame number the trajectory is currently on""" return self._frame @frame.setter def frame(self, value): self._frame = value
class TrajectoryIterator: """An iterator object for looping over :class:`~Trajectory` objects.""" def __init__(self, trajectory, arg=None): self.trajectory = trajectory if arg is None: self.indices = range(len(self.trajectory.time)) elif isinstance(arg, slice): self.indices = range(*arg.indices(len(self.trajectory.time))) elif isinstance(arg, (np.ndarray, list, tuple)): if np.issubdtype(arg.dtype, np.integer): self.indices = arg elif np.dtype == bool: self.indices = np.argwhere(arg) else: raise TypeError(f"{arg.dtype} is not a valid indexor for a trajectory") else: raise TypeError(f"{arg} is not a valid indexor for a trajectory") def __iter__(self): for frame in self.indices: yield self.trajectory[frame] def __len__(self): return len(self.indices) def process_statement(statement, logickws, subjectkws): """ Parse a selection string to identify subsets of atoms that satisfy the given conditions. Parameters ---------- statement : str The selection string to parse. logickws : dict Dictionary of logic keywords (strings) mapped to the associated logic function. subjectkws : dict Dictionary of subject keywords (strings) mapped to the associated subject attributes. Returns ------- mask : np.ndarray Array of subject indices that satisfy the given conditions. """ sub_statements = parse_paren(statement) mask = np.ones(subjectkws["_len"], dtype=bool) operation = None for stat in sub_statements: if stat.startswith("("): tmp = process_statement(stat, logickws, subjectkws) mask = operation(mask, tmp) if operation else tmp continue stat_split = stat.split() if stat_split[0] not in logickws and stat_split[0] not in subjectkws: raise ValueError( f"Invalid selection keyword: {stat_split[0]}. All selections statements and substatements must start " f"with a valid selection keyword" ) subject = None values = [] while len(stat_split) > 0: if stat_split[0] in subjectkws: subject = subjectkws[stat_split.pop(0)] values = [] elif stat_split[0] in logickws: if subject is not None: values = np.array(values, dtype=subject.dtype) tmp = process_sub_statement(subject, values) subject, values = None, [] mask = operation(mask, tmp) if operation else tmp # Get next operation operation, stat_split = build_operator(stat_split, logickws) else: values += parse_value(stat_split.pop(0)) if subject is not None: values = np.array(values, dtype=subject.dtype) tmp = process_sub_statement(subject, values) mask = operation(mask, tmp) if operation else tmp operation = None elif not values and hasattr(operation, "novals"): mask = operation(mask, np.ones_like(mask, dtype=bool)) return mask def parse_paren(string): """ Break down a given string to parenthetically defined subcomponents Parameters ---------- string : str The string to break down. Returns ------- results : List[str] List of parenthetically defined subcomponents. """ stack = 0 start_idx = 0 end_idx = 0 results = [] for i, c in enumerate(string): if c == "(": if stack == 0: start_idx = i + 1 if i != 0: results.append(string[end_idx:i].strip()) stack += 1 elif c == ")": # pop stack stack -= 1 if stack == 0: results.append(f"({string[start_idx:i].strip()})") start_idx = end_idx end_idx = i + 1 if end_idx != len(string) - 1: results.append(string[end_idx:].strip()) results = [res for res in results if res != ""] if len(results) == 1 and results[0].startswith("("): results = parse_paren(results[0][1:-1]) if stack != 0: raise RuntimeError( "The provided statement is missing a parenthesis or has an extra one." ) return results def check_operation(operation, stat_split, logickws): """ Given an advanced operation, identify the additional arguments being passed to the advanced operation and create a simple operation that defined specifically for the provided arguments. Parameters ---------- operation : callable A function or callable object that operates on molecular system attributes. stat_split : List[str] the arguments associated with the operation logickws : dict Dictionary of logic keywords (strings) mapped to the associated logic function. Returns ------- operation : callable A simplified version of the provided operation now accounting for the user provided parameters. """ advanced_operators = ( logickws["within"], logickws["around"], logickws["byres"], logickws["point"], ) if operation in advanced_operators: outer_operation = logickws["and"] args = [stat_split.pop(i) for i in range(1, 1 + operation.nargs)] operation = partial(operation, *args) def toperation(a, b, outer_operation, _io): return outer_operation(a, _io(b)) operation = partial(toperation, outer_operation=outer_operation, _io=operation) return operation def build_operator(stat_split, logickws): """ A function to combine multiple sequential operators into one. Parameters ---------- stat_split : List[str] The list of string arguments passed by the user to define the sequence of operations logickws : dict Dictionary of logic keywords (strings) mapped to the associated logic function. Returns ------- operation : callable A compression of sequential operations combined into one. split_stat : List[str] Any remaining arguments passed by the user that were not used to define ``operation`` """ operation = logickws["and"] unary_operators = (logickws["not"], logickws["byres"]) binary_operators = (logickws["and"], logickws["or"]) advanced_operators = (logickws["within"], logickws["around"], logickws["point"]) while _io := logickws.get(stat_split[0], False): if _io in unary_operators: def toperation(a, b, operation, _io): return operation(a, _io(b)) operation = partial(toperation, operation=operation, _io=_io) stat_split = stat_split[1:] elif _io in advanced_operators: stat_split = stat_split[1:] args = [stat_split.pop(0) for i in range(_io.nargs)] _io = partial(_io, *args) def toperation(a, b, operation, _io): return operation(a, _io(b)) operation = partial(toperation, operation=operation, _io=_io) if hasattr(_io.func, "novals"): operation.novals = _io.func.novals elif _io in binary_operators: if operation != logickws["and"]: raise RuntimeError( "Cannot have two binary logical operators in succession" ) operation = _io stat_split = stat_split[1:] if len(stat_split) == 0: break return operation, stat_split def parse_value(value): """ Helper function to parse values that may have slices. Parameters ---------- value : str, int, ArrayLike[int] Values to parse. Returns ------- return_value : List A list of values that satisfy the input. """ return_value = [] if "-" in value: start, stop = [int(x) for x in value.split("-")] return_value += list(range(start, stop + 1)) elif ":" in value: start, stop = [int(x) for x in value.split(":")] return_value += list(range(start, stop + 1)) else: return_value.append(value) return return_value def process_sub_statement(subject, values): if subject.dtype == bool: return subject return np.isin(subject, values)
[docs] class AtomSelection(MolecularSystemBase): """ An object containing a group of atoms from a :class:`~MolSys` object Parameters ---------- molsys : :class:`~MolSys` The :class:`~MolSys` object from which the AtomSelection belongs to. mask : np.ndarray An array of atom indices that defines the atoms of ``molsys`` that make up the atom selection. """ def __new__(cls, *args): if len(args) == 2: molsys, mask = args else: return object.__new__(cls) if isinstance(mask, int): return Atom(molsys, mask) else: return object.__new__(cls) def __init__(self, molsys, mask, bond_mask=None): self.molsys = molsys self.mask = mask self._update_bond_mask(bond_mask) def __getitem__(self, item): if np.issubdtype(type(item), np.integer): relidx = self.molsys.ix[self.mask][item] return self.molsys._atoms[relidx] if isinstance(item, slice): return AtomSelection(self.molsys, self.mask[item]) elif isinstance(item, (np.ndarray, list, tuple)): item = np.asarray(item) if len(item) == 1 or item.sum() == 1: return self.molsys._atoms[self.mask[item][0]] elif item.dtype == bool: item = np.argwhere(item).T[0] return AtomSelection(self.molsys, item) else: return AtomSelection(self.molsys, self.mask[item]) elif hasattr(item, "__iter__"): if all([np.issubdtype(type(x), int) for x in item]): return AtomSelection(self.molsys, self.mask[item]) raise TypeError( "Only integer, slice type, and boolean mask arguments are supported at this time" ) def __len__(self): return len(self.mask)
[docs] class ResidueSelection(MolecularSystemBase): """ An object containing a group of residues and all their atoms from a :class:`~MolSys` object. Note that if one atom of a residue is present in the AtomSelection used to create the group, all atoms of that residue will be present in the ResidueSelection object. .. note:: Unlike a regular :class:`~AtomSelection` object, the ``resnames``, ``resnums``, ``segids`` and ``chains`` attributes of this object are tracked with respect to residue not atom and changes made to these attributes on this object will not be present in the parent object and vice versa. Parameters ---------- molsys : :class:`~MolSys` The :class:`~MolSys` object from which the AtomSelection belongs to. mask : np.ndarray An array of atom indices that defines the atoms of ``molsys`` that make up the atom selection. bond_mask : np.ndarray boolean array specifying which bonds belong to the ``ResidueSelection``. """ def __init__(self, molsys, mask, bond_mask=None): resixs = np.unique(molsys.resixs[mask]) self.mask = np.argwhere(np.isin(molsys.resixs, resixs)).flatten() self.molsys = molsys _, self.first_ix = np.unique(molsys.resixs[self.mask], return_index=True) if len(self.mask) > 1: self.resixs = self.resixs[self.first_ix].flatten() self.resnames = self.resnames[self.first_ix].flatten() self.resnums = self.resnums[self.first_ix].flatten() self.segids = self.segids[self.first_ix].flatten() self.chains = self.chains[self.first_ix].flatten() self._update_bond_mask(bond_mask) def __setattr__(self, key, value): self.__dict__[key] = value def __getitem__(self, item): resixs = np.unique(self.molsys.resixs[self.mask]) new_resixs = resixs[item] new_mask = np.argwhere(np.isin(self.molsys.resixs, new_resixs)).flatten() if np.issubdtype(type(item), int): return self.molsys._residues[new_resixs] elif isinstance(item, slice): return ResidueSelection(self.molsys, new_mask) elif hasattr(item, "__iter__"): if all([np.issubdtype(type(x), int) for x in item]): return ResidueSelection(self.molsys, new_mask) raise TypeError( "Only integer and slice type arguments are supported at this time" ) def __len__(self): return len(self.first_ix) def __iter__(self): for resix in self.resixs: yield self.molsys._residues[resix]
[docs] class SegmentSelection(MolecularSystemBase): """ An object containing a group of segments and all their atoms from a :class:`~MolSys` object. Note that if one atom of a segment is present in the AtomSelection used to create the group, all atoms of that segment will be present in the SegmentSelection object. .. note:: Unlike a regular :class:`~AtomSelection` object, the ``segids`` and ``chains`` attributes of this object are tracked with respect to segments not atoms and changes made to these attributes on this object will not be present in the parent object and vice versa. Parameters ---------- molsys : :class:`~MolSys` The :class:`~MolSys` object from which the AtomSelection belongs to. mask : np.ndarray An array of atom indices that defines the atoms of ``molsys`` that make up the atom selection. bond_mask : np.ndarray boolean array specifying which bonds belong to the ``SegmentSelection``. """ def __init__(self, molsys, mask, bond_mask=None): self.seg_ixs = np.unique(molsys.segixs[mask]) self.mask = np.argwhere(np.isin(molsys.segixs, self.seg_ixs)).flatten() self.molsys = molsys _, self.first_ix = np.unique(molsys.segixs[self.mask], return_index=True) self.segids = self.segids[self.first_ix].flatten() self.chains = self.chains[self.first_ix].flatten() self._update_bond_mask(bond_mask) def __setattr__(self, key, value): self.__dict__[key] = value def __getitem__(self, item): segixs = np.unique(self.molsys.segixs[self.mask]) new_segixs = segixs[item] new_mask = np.argwhere(np.isin(self.molsys.segixs, new_segixs)) if np.issubdtype(type(item), int): return self.molsys._segments[new_segixs] elif isinstance(item, slice): return SegmentSelection(self.molsys, new_mask) elif hasattr(item, "__iter__"): if all([np.issubdtype(type(x), int) for x in item]): return SegmentSelection(self.molsys, new_mask) raise TypeError( "Only integer and slice type arguments are supported at this time" ) def __len__(self): return len(self.first_ix) def __iter__(self): for new_segix in self.seg_ixs: yield self.molsys._segments[new_segix]
[docs] class Atom(MolecularSystemBase): """ An object containing information for a single atom. Parameters ---------- molsys : :class:`~MolSys` The :class:`~MolSys` object from which the Atom belongs to. mask : np.ndarray The index that defines the atoms of ``molsys`` that make up the atom selection. """ def __init__(self, molsys, mask): self.molsys = molsys self.index = mask self.mask = mask @property def position(self): """Cartesian coordinates of the atom in angstroms.""" return self.molsys.coords[self.index] @property def bonds(self): """list of :class:`AtomSelection` objects containing the atoms for each bond that this atom is involved in""" idxs = self.topology.bonds_any_atom[self.mask] return [AtomSelection(self.molsys, idx) for idx in idxs] @property def angles(self): """list of :class:`AtomSelection` objects containing the atoms for each angle that this atom is involved in""" idxs = self.topology.angles_any_atom[self.mask] return [AtomSelection(self.molsys, idx) for idx in idxs] @property def dihedrals(self): """list of :class:`AtomSelection` objects containing the atoms for each dihedral that this atom is involved in""" idxs = self.topology.dihedrals_any_atom[self.mask] return [AtomSelection(self.molsys, idx) for idx in idxs] @property def bonded_atoms(self): """:class:`AtomSelection` object containing all atoms that this atom is bonded to. Does not contain this atom""" all_idxs = np.unique(np.concatenate(self.topology.bonds_any_atom[self.mask])) all_idxs = np.array([idx for idx in all_idxs if idx != self.mask]) return AtomSelection(self.molsys, all_idxs)
[docs] class Residue(MolecularSystemBase): """ An object representing a single residue. Parameters ---------- molsys : :class:`~MolSys` The :class:`~MolSys` object from which the Residue belongs to. mask : int An array of atom indices that defines the atoms of the residue bond_mask : np.ndarray boolean array specifying which bonds belong to the ``Residue``. """ def __init__(self, molsys, mask, bond_mask=None): resix = np.unique(molsys.resixs[mask])[0] self.mask = np.where(molsys.resixs == resix)[0] self.molsys = molsys self._update_bond_mask(bond_mask) self.resindex = resix self.segindex = molsys.segixs[self.mask][0] def __len__(self): return len(self.mask)
[docs] def phi_selection(self): """ Get an :class:`~AtomSelection` of the atoms defining the Phi backbone dihedral angle of the residue. Returns ------- sel : AtomSelection An :class:`~AtomSelection` object containing the atoms that make up the Phi backbone dihedral angle of the selected residue. """ prev = self.previous_residue() if prev is None: return None mask_phi = [ prev.ix[prev.names == "C"].flat[0], self.ix[self.names == "N"].flat[0], self.ix[self.names == "CA"].flat[0], self.ix[self.names == "C"].flat[0], ] sel = self.molsys.atoms[mask_phi] return sel if len(sel) == 4 else None
[docs] def psi_selection(self): """ Get an :class:`~AtomSelection` of the atoms defining the Psi backbone dihedral angle of the residue. Returns ------- sel : AtomSelection An :class:`~AtomSelection` object containing the atoms that make up the Psi backbone dihedral angle of the selected residue. """ nex = self.next_residue() if nex is None: return None mask_psi = [ self.ix[self.names == "N"].flat[0], self.ix[self.names == "CA"].flat[0], self.ix[self.names == "C"].flat[0], nex.ix[nex.names == "N"].flat[0], ] sel = self.molsys.atoms[mask_psi] return sel if len(sel) == 4 else None
def previous_residue(self, ignore_chain_breaks=False): resix = self.resix - 1 res = self.molsys._residues[resix] if resix >= 0 else None if ignore_chain_breaks or res is None: return res elif self.resnum - 1 == res.resnum and self.segix == res.segix: return res else: return None def next_residue(self, ignore_chain_breaks=False): resix = self.resix + 1 res = ( self.molsys._residues[resix] if resix <= self.molsys.resixs.max() else None ) if ignore_chain_breaks or res is None: return res elif self.resnum + 1 == res.resnum and self.segix == res.segix: return res else: return None
[docs] class Segment(MolecularSystemBase): """ An object representing a single segment of a molecular system. Parameters ---------- molsys : MolSys The :class:`~MolSys` object from which the segment belongs to. mask : int An array of atom indices that defines the atoms of the segment. """ def __init__(self, molsys, mask): segix = np.unique(molsys.segixs[mask])[0] self.mask = np.where(molsys.segixs == segix)[0] self.molsys = molsys self.segindex = segix self._update_bond_mask(mask)
def byres(mask, molsys): """Advanced operator to select atoms by residue""" residues = np.unique(molsys.resixs[mask]) mask = np.isin(molsys.resixs, residues) return mask def unot(mask): """Unitary `not` operator""" return ~mask def within(distance, mask, molsys): """ Advanced logic operator to identify atoms within a user defined distance of another selection. Parameters ---------- distance : float The distance window defining which atoms will be included in the selection. mask : np.ndarray A boolean array defining a subset of atoms of a :class:`~MolSys` from which the distance cutoff will be measured by. molsys : :class:`~MolSys` A chiLife :class:`~MolSys` from which to select atoms from . Returns ------- out_mask : np.ndarray A boolean array defining a subset of atoms of a :class:`~MolSys` that are within the user defined distance of the user defined selection. """ distance = float(distance) tree1 = cKDTree(molsys.coords) tree2 = cKDTree(molsys.molsys.coords[mask]) results = np.concatenate(tree2.query_ball_tree(tree1, distance)) results = np.unique(results) out_mask = np.zeros_like(mask, dtype=bool) out_mask[results] = True out_mask = out_mask * ~mask return out_mask within.nargs = 1 def point(x, y, z, distance, mask, molsys): """ Advanced logic operator to identify atoms within a user defined distance of a point in cartesian space. Parameters ---------- x : float The x coordinate of the point in cartesian space. y : float The y coordinate of the point in cartesian space. z : float The z coordinate of the point in cartesian space. distance : float The distance window defining which atoms will be included in the selection. mask : np.ndarray A boolean array defining a subset of atoms of a :class:`~MolSys` from which the distance cutoff will be measured by. molsys : :class:`~MolSys` A chiLife :class:`~MolSys` from which to select atoms from . Returns ------- out_mask : np.ndarray A boolean array defining a subset of atoms of a :class:`~MolSys` that are within the user defined distance of the user defined selection. """ tree1 = cKDTree(molsys.coords[mask]) args = tree1.query_ball_point(np.array([x, y, z], dtype=float), distance) out_mask = np.zeros_like(mask, dtype=bool) out_mask[args] = True return out_mask point.nargs = 4 point.novals = True def concat_molsys(*systems): """ Function to concatenate two or more :class:`~MolSys` objects into a single :class:`~MolSys` object. Atoms will be concatenated in the order they are placed in the list. Parameters ---------- systems : List[MolSys] List of :class:`~MolSys` objects to concatenate. Returns ------- mol : MolSys The concatenated `~MolSys` object. """ for sys in systems: if not isinstance(sys, MolecularSystemBase): raise TypeError("Can only concatenate MolecularSystemBase instances") atom_ids = [] record_types = [] anames = [] altlocs = [] atypes = [] resnames = [] resnums = [] resindices = [] icodes = [] chains = [] occupancies = [] bs = [] segs = [] trajectory = [] segindices = [] segids = [] charges = [] chiral = [] bonds = [] bond_types = [] bond_chiral = [] names = [] n_atoms = 0 n_residues = 0 n_chains = 0 traj_len = len(systems[0].trajectory) has_bonds = any(sys.bonds is not None for sys in systems) for sys in systems: record_types.append(sys.record_types) atom_ids.append(sys.atomids) anames.append(sys.names) altlocs.append(sys.altlocs) resnames.append(sys.resnames) resnums.append(sys.resnums) icodes.append(sys.icodes) chains.append(sys.chains) occupancies.append(sys.occupancies) bs.append(sys.bs) segs.append(sys.segids) atypes.append(sys.atypes) charges.append(sys.charges) chiral.append(sys.chiral) if len(sys.trajectory) != traj_len: raise AttributeError( "Atom selections must have the same trajectory lengths to be combined" ) trajectory.append(sys.trajectory.coordinate_array[:, sys.mask]) # New bonds if not has_bonds: bonds = None bond_types = None bond_chiral = None elif sys.bonds is None: bonds.append(np.array([[]])) bond_types.append(np.array([])) bond_chiral.append(np.array([])) else: old_atom_ixs = sys.ix atom_ix_map = {old: new + n_atoms for new, old in enumerate(old_atom_ixs)} b_mask = np.all(np.isin(sys.bonds, old_atom_ixs), axis=1) sys_i_bonds = sys.bonds[b_mask] new_bonds = [[atom_ix_map[a], atom_ix_map[b]] for a, b in sys_i_bonds] bonds.append(new_bonds) bond_types.append(sys.bond_types[b_mask]) bond_chiral.append(sys.bond_chiral[b_mask]) # New resinidces starting from 0 new_resindices = np.zeros_like(sys.resindices) for i, res in enumerate(sys.residues): mask = sys.resindices == res.resindex new_resindices[mask] = i + n_residues resindices.append(new_resindices) # New segindices starting from 0 new_segindices = np.zeros_like(sys.segindices) for i, seg in enumerate(sys.segments): mask = sys.segindices == seg.segindex new_segindices[mask] = i + n_chains segindices.append(new_segindices) n_atoms += len(sys.atoms) n_residues += len(sys.residues) n_chains += len(sys.segments) names.append(sys.name) record_types = np.concatenate(record_types) atom_ids = np.concatenate(atom_ids) anames = np.concatenate(anames) altlocs = np.concatenate(altlocs) resnames = np.concatenate(resnames) resnums = np.concatenate(resnums) icodes = np.concatenate(icodes) chains = np.concatenate(chains) trajectory = np.concatenate(trajectory, axis=1) occupancies = np.concatenate(occupancies) bs = np.concatenate(bs) segs = np.concatenate(segs) atypes = np.concatenate(atypes) charges = np.concatenate(charges) chiral = np.concatenate(chiral) if np.any([x is not None for x in chiral]) else None resindices = np.concatenate(resindices) segindices = np.concatenate(segindices) if has_bonds: bonds = np.concatenate(bonds) bond_types = np.concatenate(bond_types) bond_chiral = np.concatenate(bond_chiral) if len(names) <= 3: name = "_and_".join(names) else: name = f"concatenation of {len(systems)} atom selections" return MolSys( record_types, atom_ids, anames, altlocs, resnames, resnums, icodes, chains, trajectory, occupancies, bs, segs, atypes, charges, chiral, resindices, segindices, bonds, bond_types, bond_chiral, name, ) def res_to_ccd(res): """ Create a CCD entry for a :class:`Residue`. Parameters ---------- res : Residue The residue to create the CCD entry for. Returns ------- ccd_data : dict Dictionary containing the ``chem_comp_atom``, ``chem_comp_bond`` data derived from the atoms ond bonds of the :class:`Residue`. """ chem_comp_atom = { "comp_id": [str(res.resname)] * len(res.atoms), "atom_id": res.names.tolist(), "type_symbol": res.elements.tolist(), "pdbx_aromatic_flag": ["?"] * len(res.atoms), "pdbx_stereo_config": res.chiral.tolist(), } bond_atom_names = res.bond_atom_names bond_types = [chilife.io.XL_BOND_TO_CIF[btype] for btype in res.bond_types] if len(bond_atom_names) > 0: chem_comp_bond = { "comp_id": [str(res.resname)] * len(res.bonds), "atom_id_1": bond_atom_names[:, 0].tolist(), "atom_id_2": bond_atom_names[:, 1].tolist(), "value_order": bond_types, "pdbx_aromatic_flag": ["?"] * len(res.bonds), "pdbx_stereo_config": res.bond_chiral.tolist(), } else: chem_comp_bond = { "comp_id": [], "atom_id_1": [], "atom_id_2": [], "value_order": [], "pdbx_aromatic_flag": [], "pdbx_stereo_config": [], } ccd_data = { "chem_comp_atom": chem_comp_atom, "chem_comp_bond": chem_comp_bond, } return ccd_data def struct_to_cif_atom_site(struct): """ Helper function to generate the cif ``_atom_site`` entry for a cif file. Parameters ---------- struct : MolecularSystemBase The structure or structure to be converted into the ``_atom_site`` entry. Returns ------- atom_site_dict : dict Dictionary containing the ``_atom_site`` entry for a cif file. """ n_frames = len(struct.trajectory) atom_group = np.tile(struct.record_types, n_frames).tolist() atom_id = np.tile(struct.atomids.astype(str), n_frames).tolist() type_symbol = np.tile(struct.atypes, n_frames).tolist() label_atom_id = np.tile(struct.names, n_frames).tolist() label_alt_id = np.tile( [x if x.strip() else "." for x in struct.altlocs], n_frames ).tolist() label_comp_id = np.tile(struct.resnames, n_frames).tolist() label_asym_id = np.tile(struct.segids, n_frames).tolist() label_entity_id = np.tile((struct.segindices + 1).astype(str), n_frames).tolist() label_seq_id = np.tile((struct.resindices + 1).astype(str), n_frames).tolist() pdbx_PDB_ins_code = np.tile( [x if x.strip() else "?" for x in struct.icodes], n_frames ).tolist() occupancy = np.tile(struct.occupancies.astype(str), n_frames).tolist() B_iso_or_equiv = np.tile(struct.bs.astype(str), n_frames).tolist() auth_seq_id = np.tile(struct.resnums.astype(str), n_frames).tolist() auth_asym_id = np.tile(struct.chains, n_frames).tolist() pdbx_PDB_model_num = np.concatenate( [[str(i + 1)] * struct.n_atoms for i in range(n_frames)] ).tolist() cartn_x, cartn_y, cartn_z = [], [], [] for i, ts in enumerate(struct.trajectory): cartn_x.extend(struct.positions[:, 0].astype(str).tolist()) cartn_y.extend(struct.positions[:, 1].astype(str).tolist()) cartn_z.extend(struct.positions[:, 2].astype(str).tolist()) atom_site_dict = { "group_PDB": atom_group, "id": atom_id, "type_symbol": type_symbol, "label_atom_id": label_atom_id, "label_alt_id": label_alt_id, "label_comp_id": label_comp_id, "label_asym_id": label_asym_id, "label_entity_id": label_entity_id, "label_seq_id": label_seq_id, "pdbx_PDB_ins_code": pdbx_PDB_ins_code, "cartn_x": cartn_x, "cartn_y": cartn_y, "cartn_z": cartn_z, "occupancy": occupancy, "B_iso_or_equiv": B_iso_or_equiv, "auth_seq_id": auth_seq_id, "auth_asym_id": auth_asym_id, "pdbx_PDB_model_num": pdbx_PDB_model_num, } return atom_site_dict