Source code for chilife.MolSys

from __future__ import annotations

import weakref
from collections.abc import Iterable
import numbers
import operator
from functools import partial, update_wrapper

import MDAnalysis

from .ligand_utils import assign_atom_names
from .Topology import Topology, BondType, bonds_from_ccd_data
import numpy as np
from numpy.typing import ArrayLike
from scipy.spatial import cKDTree

import chilife

# TODO:
#   Behavior: AtomSelections should have orders to be enforced when indexing.

masked_properties = (
    "record_types",
    "atomids",
    "names",
    "altlocs",
    "altLocs",
    "resnames",
    "resnums",
    "icodes",
    "resids",
    "resis",
    "chains",
    "occupancies",
    "bs",
    "segs",
    "segids",
    "atypes",
    "types",
    "elements",
    "charges",
    "chiral",
    "ix",
    "resixs",
    "resindices",
    "segixs",
    "segindices",
    "_Atoms",
    "_Residues",
    "atoms",
)

singles = {
    "record_type": "record_types",
    "name": "names",
    "altloc": "altlocs",
    "altLoc": "altlocs",
    "atype": "atypes",
    "element": "elements",
    "type": "atypes",
    "resn": "resnums",
    "resname": "resnames",
    "resnum": "resnums",
    "resid": "resids",
    "icode": "icodes",
    "resi": "resis",
    "chain": "chains",
    "segid": "segids",
    "charge": "charges",
    "bfactor": "bs",
    "qfactor": "occupancies",
    "occupancy": "occupancies",
    "segix": "segixs",
    "resix": "resixs",
}



[docs]
class MolecularSystemBase:
    """Base class for molecular systems containing attributes universal to all subclasses"""

    def __getattr__(self, key):
        if not isinstance(self, MolSys) and "molsys" not in self.__dict__:
            return self.__getattribute__(key)
        elif key in singles:
            return np.squeeze(
                self.molsys.__getattribute__(singles[key])[self.mask]
            ).flat[0]
        elif isinstance(self, MolSys) and key not in self.__dict__:
            return self.__getattribute__(key)
        elif key == "trajectory":
            return self.molsys.trajectory
        elif key == "atoms":
            return self.molsys.__getattribute__(key)[self.mask]
        elif key in masked_properties or key in singles:
            return np.squeeze(self.molsys.__getattribute__(key)[self.mask])
        else:
            return self.molsys.__getattribute__(key)

    def __setattr__(self, key, value):
        if key in ("molsys", "mask"):
            super(MolecularSystemBase, self).__setattr__(key, value)
        elif key in singles:
            self.molsys.__getattribute__(singles[key])[self.mask] = value
        elif isinstance(self, MolSys) and key not in self.__dict__:
            super(MolecularSystemBase, self).__setattr__(key, value)
        elif key == "trajectory":
            self.molsys.__dict__["trajectory"] = value
        elif key in masked_properties or key in singles:
            self.molsys.__getattribute__(key)[self.mask] = value
        else:
            super(MolecularSystemBase, self).__setattr__(key, value)

    def __add__(self, other):
        if other.molsys is self.molsys:
            new_mask = np.unique(np.concatenate([self.mask, other.mask]))
            return AtomSelection(self.molsys, new_mask)

        else:
            return concat_molsys(self.atoms, other.atoms)

    def __sub__(self, other):
        if other.molsys is self.molsys:
            tmask = ~np.isin(self.mask, other.mask)
            new_mask = self.mask[tmask]
            return AtomSelection(self.molsys, new_mask)
        else:
            raise RuntimeError("You can not subtract atoms from a different molsys.")


[docs]
    def select_atoms(self, selstr):
        """
        Select atoms from :class:`~MolSys` or :class:`~AtomSelection` based on selection a selection string, similar
        to the `select_atoms` method from `MDAnalysis <https://docs.mdanalysis.org/stable/documentation_pages/selections.html>`_

        Parameters
        ----------
        selstr : str
            Selection string

        Returns
        -------
        atoms : :class:`~AtomSelection`
            Selected atoms
        """

        mask = process_statement(selstr, self.logic_keywords, self.molsys_keywords)
        if hasattr(self, "mask"):
            t_mask = np.zeros(self.molsys.n_atoms, dtype=bool)
            t_mask[self.mask] = True
            mask *= t_mask

        mask = np.argwhere(mask).T[0]
        return AtomSelection(self.molsys, mask)


    def __getitem__(self, item):
        if np.issubdtype(type(item), np.integer):
            relidx = self.molsys.ix[self.mask][item]
            return Atom(self.molsys, relidx)

        if isinstance(item, slice):
            return AtomSelection(self.molsys, self.mask[item])

        elif isinstance(item, (np.ndarray, list, tuple)):
            item = np.asarray(item)
            if len(item) == 1 or item.sum() == 1:
                return Atom(self.molsys, self.mask[item])
            elif item.dtype == bool:
                item = np.argwhere(item).T[0]
                return AtomSelection(self.molsys, item)
            else:
                return AtomSelection(self.molsys, self.mask[item])

        elif hasattr(item, "__iter__"):
            if all([np.issubdtype(type(x), int) for x in item]):
                return AtomSelection(self.molsys, self.mask[item])

        raise TypeError(
            "Only integer, slice type, and boolean mask arguments are supported at this time"
        )

    @property
    def fname(self):
        """File name or functional name of the molecular system"""
        return self.molsys._fname

    @fname.setter
    def fname(self, val):
        self.molsys._fname = val

    @property
    def positions(self):
        """3-Dimensional cartesian coordinates of the atoms in the molecular system"""
        return self.coords

    @positions.setter
    def positions(self, val):
        self.coords = np.asarray(val)

    @property
    def coords(self):
        """3-dimensional cartesian coordinates of the atoms in the molecular system"""
        return self.trajectory.coords[self.trajectory.frame, self.mask]

    @coords.setter
    def coords(self, val):
        self.trajectory.coords[self.trajectory.frame, self.mask] = np.asarray(val)

    @property
    def n_atoms(self):
        """ "Number of atoms in the molecular system"""
        return len(self.mask)

    @property
    def n_residues(self):
        """Number of residues in the molecular system"""
        return len(np.unique(self.resixs[self.mask]))

    @property
    def n_chains(self):
        """Number of chains in the molecular system"""
        return len(np.unique(self.chains[self.mask]))

    @property
    def n_segments(self):
        """Number of segments in the molecular system"""
        return len(np.unique(self.segments[self.mask]))

    @property
    def residues(self):
        """:class:`~ResidueSelection` for all residues of the molecular system"""
        return ResidueSelection(self.molsys, self.mask)

    @property
    def segments(self):
        """:class:`~SegmentSelection` for all segments (chains) of the molecular system"""
        return SegmentSelection(self.molsys, self.mask)

    @property
    def logic_keywords(self):
        """Dictionary of logic keywords (strings) pertaining to :meth:`atom_selection` method. Keywords are mapped
        to the associated logic function"""
        return self.molsys._logic_keywords

    @property
    def molsys_keywords(self):
        """Dictionary of molecular system keywords (strings) pertaining to :meth:`atom_selection` method. Keywords are
        mapped to the associated molecular system attributes"""
        return self.molsys._molsys_keywords

    @property
    def universe(self):
        """Wrapper attribute to allow molecular systems to behave like MDAnalysis AtomGroup objects"""
        return self.molsys

    @property
    def bonds(self):
        """Array of atom indices specifying bonds between atoms. Note that the bonds are indexed with respect to the
        parent molecular system and are not specific to any atom, residue or chain selection"""
        bonds = self._bonds[self._bond_mask] if self._bonds is not None else None
        return bonds

    @bonds.setter
    def bonds(self, val):
        raise AttributeError(
            "Bonds cannot be set explicitly. Please use the `add_bond(s)` and `remove_bond(s)` methods."
        )

    @property
    def bond_atom_names(self):
        """Array of atom names of bonded atoms corresponding to those in :meth:`~MolecularSystemBase.bonds`"""
        return np.array([self.molsys.names[b] for b in self.bonds])

    @property
    def bond_types(self):
        """Array of chiLife BondType objects (enumerations) specifying the type of bond between atoms in
        :meth:`~MolecularSystemBase.bonds`"""
        if self.bonds is None or self._bond_mask is None:
            return None

        bond_types = self._bond_types[self._bond_mask]
        return bond_types

    @bond_types.setter
    def bond_types(self, val):
        raise AttributeError(
            "Bond types cannot be set explicitly. Please use the `add_bond(s)` and `remove_bond(s)` methods."
        )

    @property
    def bond_chiral(self):
        """chirality of the bonds between atoms specified in :meth:`~MolecularSystemBase.bonds`"""
        if self.bonds is None or self._bond_mask is None:
            return None
        bond_chiral = self.molsys._bond_chiral[self._bond_mask]
        return bond_chiral

    def _check_mol_sys_for_bonds(self):
        """
        Helper function to check if the current object has any bonds
        """
        if not isinstance(self, MolSys):
            raise AttributeError(
                "Bonds and bond types can only be set on `MolSys` classes and not atom/residue/chain selections"
            )


[docs]
    def add_bond(self, bond, bond_type=None):
        """
        Add singular bond between two atoms.

        Parameters
        ----------
        bond : ArrayLike[int]
            size 2 array of integers specifying bonds between atom indices of the parent :class:`MolSys`.
        bond_type : ArrayLike[BondType] | None
            Array of chilife BondType instances. If not specified ``BondType.UNSPECIFIED`` will be used.
        """
        self._check_mol_sys_for_bonds()
        bond_type = bond_type or BondType.UNSPECIFIED

        if self._check_overwrite_bond(bond, bond_type):
            return None
        elif len(self._bonds) == 0:
            self._bonds = np.array([bond])
            self._bond_types = np.array([bond_type])
        else:
            self._bonds = np.concatenate((self._bonds, [bond]))
            self._bond_types = np.concatenate((self._bond_types, [bond_type]))

        self._bond_mask = np.arange(len(self._bonds))
        self.topology = Topology(self, self._bonds, self._bond_types)


    def _check_overwrite_bond(self, bond, bond_type):
        if self.bonds is None:
            return False

        if len(self.bonds) == 0:
            return False

        exists = np.argwhere(np.all(self._bonds == bond, axis=1)).flatten()
        exists_reversed = np.argwhere(
            np.all(self._bonds == bond[::-1], axis=1)
        ).flatten()
        if exists.size > 0:
            if bond_type is not None:
                self._bond_types[exists[0]] = bond_type

        elif exists_reversed.size > 0:
            if bond_type is not None:
                self._bond_types[exists_reversed[0]] = bond_type

        else:
            return False

        return True


[docs]
    def add_bonds(self, bonds, bond_type=None, bond_chiral=None, update_topology=True):
        """
        Add multiple bonds between atoms.

        Parameters
        ----------
        bonds : ArrayLike[int]
            N by 2 array of integers specifying bonds between atom indices of the parent :class:`MolSys`.
        bond_type : ArrayLike[BondType] | None
            Array of chilife BondType instances. Must be the same length as ``bonds``. If not specified
            ``BondType.UNSPECIFIED`` will be used.
        bond_chiral : ArrayLike[str] | None
            chirality of the bonds.
        update_topology : bool
            Toggle updating the attached MolSys Topology.
        """

        self._check_mol_sys_for_bonds()
        bonds = np.atleast_1d(bonds)
        bond_type = (
            bond_type if bond_type is not None else [BondType.UNSPECIFIED] * len(bonds)
        )
        bond_chiral = bond_chiral if bond_chiral is not None else ["?"] * len(bonds)

        if not isinstance(bond_type, Iterable):
            bond_type = [bond_type] * len(bonds)
        if not isinstance(bond_chiral, Iterable):
            bond_chiral = [bond_chiral] * len(bonds)

        if len(bonds) != len(bond_type):
            raise AttributeError(
                "Failed to add bonds to the MolSys because the number of bonds and bond_types do not match."
            )
        if len(bonds) != len(bond_chiral):
            raise AttributeError(
                "Failed to add bonds to the MolSys because the number of bonds and bond_chiral do not match."
            )

        new_bonds, new_bond_types, new_bond_chiral = [], [], []
        for b, btype, chiral in zip(bonds, bond_type, bond_chiral):
            if not self._check_overwrite_bond(b, btype):
                new_bonds.append(b)
                new_bond_types.append(btype)
                new_bond_chiral.append(chiral)

        # Convert to numpy arrays
        new_bonds = np.array(new_bonds)
        new_bond_types = np.array(new_bond_types)
        new_bond_chiral = np.array(new_bond_chiral)

        if self.bonds is None or len(self.bonds) == 0:
            self._bonds = new_bonds
            self._bond_types = new_bond_types
            self._bond_chiral = new_bond_chiral
        else:
            self._bonds = np.concatenate((self._bonds, new_bonds))
            self._bond_types = np.concatenate((self._bond_types, new_bond_types))
            self._bond_chiral = np.concatenate((self._bond_chiral, new_bond_chiral))

        # Update persistent residue/segement objects
        if hasattr(self, "_residue_list"):
            for res in self._residues:
                res._update_bond_mask()
        if hasattr(self, "_segments_list"):
            for seg in self._segments:
                seg._update_bond_mask()

        self._bond_mask = np.arange(len(self._bonds))

        if update_topology:
            self.topology = Topology(self, self._bonds, self._bond_types)



[docs]
    def remove_bond(self, bond):
        """
        Remove a singular bond between two atoms.

        Parameters
        ----------
        bond : ArrayLike[int]
            Array of two integers specifying atoms involved in the bond that is to be removed.
        """
        self.remove_bonds([bond])
        self._bond_mask = np.arange(len(self._bonds))



[docs]
    def remove_bonds(self, bonds=None):
        """
        Remove multiple bonds between atoms.

        Parameters
        ----------
        bonds : ArrayLike[int]
            N by 2 array of integers specifying the parent :class:`MolSys` atom indices of the atoms that are to be
            un-bonded.
        """
        if bonds is None:
            self._bonds = np.array([[]])
            self._bond_types = np.array([])
            self._bond_mask = np.array([], dtype=bool)
            return None

        self._check_mol_sys_for_bonds()
        mask = np.ones(len(bonds), dtype=bool)
        for bond in bonds:
            exists = np.all(self._bonds == bond, axis=1)
            exists_reversed = np.all(self._bonds == bond[::-1], axis=1)

            if exists.size > 0:
                mask *= ~exists
            elif exists_reversed:
                mask *= ~exists_reversed

        self._bonds = self.bonds[mask]
        self._bond_types = self.bond_types[mask]
        self._bond_mask = np.arange(len(self._bonds))


    def _update_bond_mask(self, bond_mask=None):
        if bond_mask is not None:
            self._bond_mask = bond_mask
        elif self.molsys._bonds is None:
            self._bond_mask = None
        else:
            self._bond_mask = np.argwhere(
                np.isin(self.molsys._bonds, self.mask).all(axis=1)
            ).flatten()


[docs]
    def copy(self):
        """Creates a deep copy of the underlying MolSys and returns an :class:`AtomSelection` from the new copy
        matching the current selection if it exists."""
        p2 = self.molsys.copy()
        return AtomSelection(p2, self.mask.copy())


    def __iter__(self):
        """Iterate over all atoms of the MolSys"""
        for idx in self.mask:
            yield self.molsys.atoms[idx]

    def __eq__(self, value):
        """Checks if two molecular systems are equal by making sure they have the same ``self.molsys`` and the same
        mask"""
        return self.molsys is value.molsys and self.mask == value.mask

    def write_pdb(self, filename):
        pass


[docs]
    def write_cif(self, filename, use_bio_ccd: bool = True):
        """
        Write a CIF file of the current ``MolSys``. Note that if there are bonds or formal charges this information will
        be preserved in the _chem_comp_atom and _chem_comp_bond fields.

        Parameters
        ----------
        filename : str | Path
            File name or ``Path`` to write the cif file to.
        use_bio_ccd : bool
            Use the chilife ``bio_ccd`` to assign bond information to residues where this information is not already
            present. If the residue is not in the ``bio_ccd`` and does not have bond information in the ``MolSys``
            objectno CCD information will be written.
        """
        segids, stype = [], []
        poly_ids, strand_ids, poly_type = [], [], []
        poly_asym, poly_auth_num, poly_icodes = [], [], []
        poly_seq_eids, poly_seq_hetero, poly_seq_mon_id, poly_seq_num = [], [], [], []
        nonpoly_asym_id, nonpoly_auth_seq, nonpoly_eid = [], [], []
        nonpoly_mon_id, nonpoly_icode, nonpoly_seq_num, nonpoly_sid = [], [], [], []

        for i, seg in enumerate(self.segments):
            segids.append(str(i + 1))
            if len(seg.residues) > 1:
                stype.append("polymer")
                if any(x.resname in chilife.nataa_codes for x in seg.residues):
                    poly_type.append("polypeptide")
                elif any(x.resname in chilife.natnu_codes for x in seg.residues):
                    poly_type.append("nucleotide")
                else:
                    poly_type.append("unknown")

                poly_ids.append(str(i + 1))
                strand_ids.append(str(seg.segid))

                poly_seq_num.extend([str(x) for x in range(1, len(seg.residues) + 1)])
                poly_seq_mon_id.extend([res.resname for res in seg.residues])
                poly_seq_hetero.extend(
                    [
                        "y" if np.any(res.atoms.record_type == "HETATM") else "n"
                        for res in seg.residues
                    ]
                )
                poly_seq_eids.extend([poly_ids[-1]] * len(seg.residues))
                poly_asym.extend([str(res.segid) for res in seg.residues])
                poly_auth_num.extend([str(res.resnum) for res in seg.residues])
                poly_icodes.extend(
                    [res.icode if res.icode.strip() else "." for res in seg.residues]
                )

            else:
                res = seg.residues[0]
                nonpoly_eid.append(str(i + 1))
                nonpoly_asym_id.append(str(seg.segid))
                nonpoly_auth_seq.append(str(res.resnum))
                nonpoly_mon_id.append(res.resname)
                nonpoly_icode.append(res.icode if res.icode != "" else ".")
                nonpoly_seq_num.append(str(1))
                nonpoly_sid.append(str(seg.segid))
                stype.append("non-polymer")

        entites = self.segindices
        atom_site_data = struct_to_cif_atom_site(self)

        local_ccd = {}
        for res in self._residues:
            if res.resname in local_ccd:
                continue
            elif res.resname in chilife.bio_ccd and use_bio_ccd:
                local_ccd[res.resname] = chilife.bio_ccd[res.resname]
            else:
                local_ccd[res.resname] = res_to_ccd(res)

        ccd_list = sorted(local_ccd.keys())
        local_ccd = chilife.io.join_ccd_info(ccd_list, local_ccd)

        cif_data = {
            self.name: {
                "entry": {"id": self.name},
                "atom_type": {
                    "symbol": [str(e) for e in sorted(np.unique(self.elements))]
                },
                "audit_author": {
                    "name": ["chiLife", "Maxx Tessmer", "Stefan Stoll"],
                    "pdbx_ordinal": ["1", "2", "3"],
                },
                "citation": {
                    "book_publisher": "?",
                    "country": "US",
                    "id": "primary",
                    "journal_full": "PLoS Computational Biology",
                    "journal_id_ISSN": "1553-734X",
                    "journal_volume": "19",
                    "journal_issue": "3",
                    "journal_page_first": "e1010834",
                    "journal_page_last": "e1010834",
                    "title": "chiLife: An open-source Python package for in silico spin labeling and integrative protein modeling",
                    "year": "2023",
                },
                "citation_author": {
                    "citation_id": ["primary", "primary"],
                    "name": ["Maxx Tessmer", "Stefan Stoll"],
                    "ordinal": ["1", "2"],
                },
                "entity": {
                    "id": segids,
                    "pdbx_description": ["."] * len(segids),
                    "type": stype,
                },
                "entity_poly": {
                    "entity_id": poly_ids,
                    "pdbx_strand_id": strand_ids,
                    "pdbx_type": poly_type,
                },
                "entity_poly_seq": {
                    "entity_id": poly_seq_eids,
                    "hetero": poly_seq_hetero,
                    "mon_id": poly_seq_mon_id,
                    "num": poly_seq_num,
                },
                "ma_data": {
                    "content_type": "model_coordinates",
                    "id": 1,
                    "name": "Model",
                },
                "pdbx_nonpoly_scheme": {
                    "asym_id": nonpoly_asym_id,
                    "auth_seq_num": nonpoly_auth_seq,
                    "entity_id": nonpoly_eid,
                    "mon_id": nonpoly_mon_id,
                    "pdb_ins_code": nonpoly_icode,
                    "pdb_seq_num": nonpoly_seq_num,
                    "pdb_strand_id": nonpoly_sid,
                },
                "pdbx_poly_seq_scheme": {
                    "asym_id": poly_asym,
                    "auth_seq_num": poly_auth_num,
                    "entity_id": poly_seq_eids,
                    "hetero": poly_seq_hetero,
                    "mon_id": poly_seq_mon_id,
                    "pdb_ins_code": poly_icodes,
                    "pdb_seq_num": poly_seq_num,
                    "pdb_strand_id": poly_asym,
                    "seq_id": poly_seq_num,
                },
                "software": {
                    "classification": "model building",
                    "date": "2023-03-31",
                    "description": "Protein modeling",
                    "name": "chiLife",
                    "pdbx_ordinal": "1",
                    "type": "python package",
                    "version": chilife.__version__,
                },
                "struct_asym": {
                    "entity_id": poly_ids,
                    "id": strand_ids,
                },
                "atom_site": atom_site_data,
                **local_ccd,
            }
        }

        # Delete empty entries
        del_list = []
        for k1, v1 in cif_data[self.name].items():
            bools = []
            for k2, v2 in v1.items():
                bools.append(bool(v2))

            if np.all(~np.array(bools)):
                del_list.append(k1)

        for key in del_list:
            del cif_data[self.name][key]

        chilife.io.write_cif(filename, cif_data)


    def _make_sdf_data(self):
        sdf_data = []

        properties = {}

        if len(chg_mask := np.argwhere(self.charges != 0).flatten().tolist()) > 0:
            properties["CHG"] = {
                "atom_ids": chg_mask,
                "charges": self.charges[chg_mask].tolist(),
            }

        for ts in self.trajectory:
            sdf_dict = {
                "name": self.name,
                "software": f"chiLife {chilife.__version__}",
                "comment": "",
                "n_atoms": self.n_atoms,
                "n_bonds": len(self.bonds),
                "chiral": int(np.any(~np.isin(self.chiral, ("?", "N")))),
                "format": "v2000",
                "atoms": [
                    {
                        "index": i,
                        "xyz": atom.position.tolist(),
                        "element": atom.element,
                        "mass difference": 0,
                        "charge": 0,
                        "stereo": 0,
                        "valence": 0,
                    }
                    for i, atom in enumerate(self.atoms)
                ],
                "bonds": [
                    {
                        "idx1": a1,
                        "idx2": a2,
                        "type": chilife.io.XL_BOND_TO_SDF[btype],
                        "stereo": 0,
                    }
                    for (a1, a2), btype in zip(self.bonds, self.bond_types)
                ],
                "properties": properties,
            }

            sdf_data.append(sdf_dict)

        return sdf_data


[docs]
    def write_sdf(self, filename):
        """
        Write an SDF file from the current ``MolSys`` object. Note there is no support for sdf style chirality,
        isotopes, valence, RAD, ISO, RBC, SUB, UNS,

        Parameters
        ----------
        filename : str | Path
            Name of the file
        """
        sdf_data = self._make_sdf_data()
        chilife.io.write_sdf(sdf_data, filename)





[docs]
class MolSys(MolecularSystemBase):
    """
    An object containing all attributes of a molecular system.

    Parameters
    ----------
    atomids : np.ndarray
        Array of atom indices where each index is unique to each atom in the molecular system.
    names : np.ndarray
        Array of atom names, e.g. 'N', 'CA' 'C'.
    altlocs : np.ndarray
        Array of alternative location identifiers for each atom.
    resnames : np.ndarray
        Array of residue names for each atom.
    resnums : np.ndarray
        Array of residue numbers for each atom.
    chains : np.ndarray
        Array of chain identifiers for each atom.
    trajectory : np.ndarray
        Cartesian coordinates of each atom for each state of an ensemble or for each timestep of a trajectory.
    occupancies : np.ndarray
        Occupancy or q-factory for each atom.
    bs : np.ndarray
        thermal, or b-factors for each atom.
    segs : np.ndarray
        Segment/chain IDs for each atom.
    atypes : np.ndarray
        Atom/element types.
    charges : np.ndarray
        Formal or partial charges assigned to each atom.
    chiral : nd.array | None
        Chirality specifier for each atom.
    resindices: np.ndarray | None
        Array of the residue indices each atom belongs to. resindices are 0-indexed.
    segindices: np.ndarray | None
        Array of the segment index each atom belongs to. segindices are 0-indexed.
    bonds : ArrayLike | None
        Array of atom ID pairs corresponding to all atom bonds in the system.
    bond_types : ArrayLike | None
        Array of BondType objects corresponding to all bonds in the system.
    bond_chiral: ArrayLike | None
        Array specifying the chirality (if any) of each bond.
    name : str
        Name of molecular system
    use_ccd: dict | None
        A dictionary containing ccd information needed to generate topological information. This can still be used if
        bonds are provided but some are missing.
    """

    def __init__(
        self,
        record_types: np.ndarray,
        atomids: np.ndarray,
        names: np.ndarray,
        altlocs: np.ndarray,
        resnames: np.ndarray,
        resnums: np.ndarray,
        icodes: np.ndarray,
        chains: np.ndarray,
        trajectory: np.ndarray,
        occupancies: np.ndarray,
        bs: np.ndarray,
        segs: np.ndarray,
        atypes: np.ndarray,
        charges: np.ndarray,
        chiral: np.ndarray | None = None,
        resindices: np.ndarray | None = None,
        segindices: np.ndarray | None = None,
        bonds: np.ndarray | None = None,
        bond_types: np.ndarray | None = None,
        bond_chiral: np.ndarray | None = None,
        name: str = "Noname_MolSys",
        use_ccd: dict | None = None,
    ):
        # self.molsys = weakref.proxy(self)
        self.record_types = record_types.copy()
        self.atomids = atomids.copy()
        self.names = names.copy().astype("U4")
        self.altlocs = altlocs.copy()
        self.resnames = resnames.copy()
        self.resnums = resnums.copy()
        self.icodes = icodes.copy()
        self.chains = chains.copy()
        self.trajectory = Trajectory(trajectory.copy(), self)
        self.occupancies = occupancies.copy()
        self.bs = bs.copy()
        self.segs = segs.copy() if np.any(segs != "") else chains.copy()
        self.atypes = atypes.copy()
        self.charges = charges.copy()
        self.chiral = (
            chiral.copy() if chiral is not None else np.array(["?"] * len(atomids))
        )
        self._fname = name

        self.ix = np.arange(len(self.atomids))
        self.mask = np.arange(len(self.atomids))

        # resix_borders = np.nonzero(np.r_[1, np.diff(self.resnums)[:-1]])
        # resix_borders = np.append(resix_borders, [self.n_atoms])
        # resixs = []
        # for i in range(len(resix_borders) - 1):
        #     dif = resix_borders[i + 1] - resix_borders[i]
        #     resixs.append(np.ones(dif, dtype=int) * i)

        if resindices is None:
            residues, resixs = [], []
            i = -1
            for aidx in range(self.n_atoms):
                residue_id = self.resnums[aidx], self.icodes[aidx], self.chains[aidx]
                if residue_id not in residues:
                    residues.append(residue_id)
                    i += 1

                resixs.append(i)

            self.resixs = np.array(resixs)
        else:
            self.resixs = resindices

        self.resindices = self.resixs

        if segindices is None:
            segix_map = {v: i for i, v in enumerate(np.unique(self.segs))}
            self.segixs = np.array([segix_map[k] for k in self.segs])
        else:
            self.segixs = segindices
        self.segindices = self.segixs

        self.is_protein = np.array(
            [res in chilife.SUPPORTED_RESIDUES for res in resnames]
        )

        self._molsys_keywords = {
            "id": self.atomids,
            "record_type": self.record_types,
            "name": self.names,
            "altloc": self.altlocs,
            "resname": self.resnames,
            "resnum": self.resnums,
            "resix": self.resixs,
            "resindex": self.resixs,
            "icode": self.icodes,
            "chain": self.chains,
            "occupancies": self.occupancies,
            "b": self.bs,
            "chiral": self.chiral,
            "segid": self.segs,
            "segix": self.segixs,
            "segindex": self.segixs,
            "type": self.atypes,
            "charges": self.charges,
            "resi": self.resnums,
            "resid": self.resnums,
            "i.": self.resnums,
            "s.": self.segs,
            "c.": self.chains,
            "r.": self.resnames,
            "n.": self.names,
            "resn": self.resnames,
            "q": self.occupancies,
            "elem": self.atypes,
            "element": self.atypes,
            "protein": self.is_protein,
            "_len": self.n_atoms,
        }

        self._logic_keywords = {
            "and": operator.mul,
            "or": operator.add,
            "not": unot,
            "!": unot,
            "<": operator.lt,
            ">": operator.gt,
            "==": operator.eq,
            "<=": operator.le,
            ">=": operator.ge,
            "!=": operator.ne,
            "byres": partial(byres, molsys=self.molsys),
            "within": update_wrapper(partial(within, molsys=self.molsys), within),
            "around": update_wrapper(partial(within, molsys=self.molsys), within),
            "point": update_wrapper(partial(point, molsys=self.molsys), point),
        }

        # Aliases
        self.resns = self.resnames
        self.resis = self.resnums
        self.altLocs = self.altlocs
        self.resids = self.resnums
        self.segids = self.segs
        self.types = self.atypes
        self.elements = self.atypes
        self.bfactors = self.bs

        self._bonds = None
        self._bond_types = None
        self._bond_mask = np.array([], dtype=bool)
        self._bond_chiral = None

        add_ccd_bonds = False
        if use_ccd is not None:
            if use_ccd is True:
                use_ccd = chilife.bio_ccd

            unique_resnames = np.unique(resnames)
            unique_resnames = [
                res
                for res in unique_resnames
                if (res in use_ccd) and ("chem_comp_atom" in use_ccd[res])
            ]
            atom_chiral_mapping = {
                (res, aid): c
                for res in unique_resnames
                for aid, c in zip(
                    use_ccd[res]["chem_comp_atom"]["atom_id"],
                    use_ccd[res]["chem_comp_atom"]["pdbx_stereo_config"],
                )
            }

            chiral = np.array(
                [
                    atom_chiral_mapping.get((res, atom_id), "?")
                    for res, atom_id in zip(resnames, names)
                ]
            )

            chiral_mask = self.chiral == "?"
            self.chiral[chiral_mask] = chiral[chiral_mask]
            ccd_bonds, ccd_bond_types, ccd_bond_chiral = bonds_from_ccd_data(
                self, use_ccd
            )

            if bonds is None:
                bonds = ccd_bonds
                bond_types = ccd_bond_types
                bond_chiral = ccd_bond_chiral
            else:
                add_ccd_bonds = True

        # Add Topology
        if bonds is not None:
            self._bonds = bonds.copy()
            self._bond_types = (
                bond_types.copy()
                if bond_types is not None
                else np.array([BondType.UNSPECIFIED] * len(bonds))
            )
            self._bond_mask = np.arange(len(self._bonds))
            if bond_chiral is not None and any(bond_chiral):
                self._bond_chiral = bond_chiral.copy()
            else:
                self._bond_chiral = np.array(["?"] * len(bonds))

            if hasattr(self, "_residue_list"):
                for res in self._residues:
                    res._update_bond_mask()
            if hasattr(self, "_segments_list"):
                for seg in self._segments:
                    seg._update_bond_mask()

        if add_ccd_bonds:
            self.add_bonds(
                ccd_bonds, ccd_bond_types, ccd_bond_chiral, update_topology=False
            )

        # Atoms creation is required to be last
        self.atoms = AtomSelection(self, self.mask, self._bond_mask)
        self.topology = Topology(self, bonds, bond_types) if bonds is not None else None


[docs]
    @classmethod
    def from_pdb(cls, file_name, sort_atoms=False, **kwargs):
        """
        Reads a pdb file and returns a MolSys object

        Parameters
        ----------
        file_name : str, Path
            Path to the PDB file to read.
        sort_atoms : bool
            Presort atoms for consisted internal coordinate definitions
        kwargs: dict
            Additional arguments to be passed to the :class:`MolSys` constructor.
        Returns
        -------
        cls : MolSys
            Molecular system object of the PDB structure.
        """

        pdb_dict = chilife.read_pdb(file_name, sort_atoms=sort_atoms)

        # Add chain IDs if not present.
        chain_data = pdb_dict["chains"]
        used_chains = np.unique(chain_data)
        if "" in used_chains:
            available_chains = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
            for new_chain in available_chains:
                if new_chain not in used_chains:
                    break
            chain_data[chain_data == ""] = new_chain

        return cls(**pdb_dict, **kwargs)



[docs]
    @classmethod
    def from_cif(cls, file_name):
        """
        Reads a cif file and returns a MolSys object

        Parameters
        ----------
        file_name : str, Path
            Path to the CIF file to read.
        Returns
        -------
        cls : MolSys
            Molecular system object of the CIF structure.
        """
        cif_data = chilife.io.read_cif(file_name)

        if len(cif_data) > 1:
            raise RuntimeError(
                "More than one data entry found in the cif file. Please seperate into multiple files or join as "
                "separate chains in one entry"
            )

        name = list(cif_data.keys())[0]
        cif_data = cif_data[name]
        atom_site_data = cif_data["atom_site"]

        model_id = np.array([atom_site_data["pdbx_PDB_model_num"]]).flatten()
        model_ids = np.unique(model_id)
        n_models = len(model_ids)
        n_atoms = np.sum(model_id == model_ids[0])
        anames = np.array(atom_site_data["label_atom_id"][:n_atoms])

        for i in model_ids:
            if np.sum(model_id == i) != n_atoms:
                raise RuntimeError(
                    "This CIF file contains multiple structure models with different numbers of atoms. chiLife "
                    "currently only supports reading CIF files where all models have the exact same number of atoms."
                )
            model_anames = np.array(atom_site_data["label_atom_id"])[model_id == i]
            if not np.all(model_anames == anames):
                raise RuntimeError(
                    "This CIF file contains multiple structure models with different atom orders. chiLife currently "
                    "only supports reading CIF files where all models have the exact same atom orders."
                )
        record_types = np.array(atom_site_data["group_PDB"][:n_atoms])
        atomids = np.array(atom_site_data["id"][:n_atoms])
        altlocs = np.array(
            ["" if a == "." else a for a in atom_site_data["label_alt_id"][:n_atoms]]
        )

        atypes = np.array(atom_site_data["type_symbol"][:n_atoms])
        resnames = np.array(atom_site_data["label_comp_id"][:n_atoms])
        resnums = np.array(atom_site_data["auth_seq_id"][:n_atoms], dtype=int)
        icodes = np.array(atom_site_data["pdbx_PDB_ins_code"][:n_atoms])
        icodes[icodes == "?"] = ""

        segs = np.char.array(atom_site_data["label_asym_id"][:n_atoms]) + np.char.array(
            atom_site_data["label_entity_id"][:n_atoms]
        )

        occupancies = np.array(atom_site_data["occupancy"][:n_atoms], dtype=float)
        bs = np.array(atom_site_data["B_iso_or_equiv"][:n_atoms], dtype=float)
        chains = np.array(atom_site_data["auth_asym_id"][:n_atoms])
        charges = np.array(atom_site_data["pdbx_formal_charge"][:n_atoms], dtype="U3")
        charges[charges == "?"] = "nan"
        charges = charges.astype(float)

        traj = np.empty((n_models, n_atoms, 3))
        for i, id in enumerate(model_ids):
            sl = slice(i * n_atoms, (i + 1) * n_atoms)
            xs = np.array(atom_site_data["Cartn_x"][sl], dtype=float)
            ys = np.array(atom_site_data["Cartn_y"][sl], dtype=float)
            zs = np.array(atom_site_data["Cartn_z"][sl], dtype=float)
            traj[i] = np.dstack([xs, ys, zs])

        # Get ccd
        ccd_data = chilife.io.create_ccd_dicts(cif_data)

        msys = cls(
            record_types,
            atomids,
            anames,
            altlocs,
            resnames,
            resnums,
            icodes,
            chains,
            traj,
            occupancies,
            bs,
            segs,
            atypes,
            charges,
            name=name,
            use_ccd=ccd_data,
        )
        msys._cif_data = cif_data

        return msys



[docs]
    @classmethod
    def from_sdf(cls, file_name):
        """
        Reads a sdf file and returns a MolSys object

        Parameters
        ----------
        file_name : str, Path
            Path to the sdf file to read.
        Returns
        -------
        cls : MolSys
            Molecular system object of the sdf structure.
        """
        sdf_data = chilife.io.read_sdf(file_name)
        mol0 = sdf_data[0]
        name = mol0["name"] or "LIG"
        atom_elements_0 = np.array([a["element"] for a in mol0["atoms"]])
        n_atoms = len(atom_elements_0)
        res_name = mol0["name"][:3] if mol0["name"] != "" else "LIG"

        record_types = np.array(["HETATM"] * n_atoms)
        atomids = np.array([a["index"] for a in mol0["atoms"]])
        anames = np.array(assign_atom_names(mol0))
        altlocs = np.array([""] * n_atoms)
        resnames = np.array([res_name] * n_atoms)
        resnums = np.ones(n_atoms, dtype=int)
        icodes = np.array([""] * n_atoms)
        chains = np.array(["A"] * n_atoms)
        occupancies = np.ones(n_atoms)
        bs = np.zeros(n_atoms)
        segs = np.array(["A"] * n_atoms)
        atypes = atom_elements_0

        # CHG block takes precidence over ATOM block charges
        if "CHG" in mol0["properties"]:
            charges = np.zeros(n_atoms)
            chg_idxs = mol0["properties"]["CHG"]["atom_ids"]
            chg_values = mol0["properties"]["CHG"]["charges"]
            charges[chg_idxs] = chg_values
        else:
            charges = np.array(
                [
                    chilife.io.SDF_ATOM_CHARGE_TO_INT[a["charge"]]
                    for a in sdf_data[0]["atoms"]
                ]
            )

        bonds, bond_types = [], []
        for bond in mol0["bonds"]:
            bonds.append([bond["idx1"], bond["idx2"]])
            bond_types.append(chilife.io.SDF_BOND_TO_XL[bond["type"]])
        bonds, bond_types = np.array(bonds), np.array(bond_types)

        trajectory = []
        for struct_dict in sdf_data:
            atom_elements_i = np.array([a["element"] for a in struct_dict["atoms"]])
            if not np.all(atom_elements_i == atom_elements_0):
                raise RuntimeError(
                    "This structures in this sdf file contain different atoms, suggesting they are not all the same "
                    "molecule. chiLife currently only supports reading sdf files as MolSys objects if all molecules in "
                    "the sdf are the same with only atom positions differing."
                )

            coords = [a["xyz"] for a in struct_dict["atoms"]]
            trajectory.append(coords)
        trajectory = np.array(trajectory)

        msys = cls(
            record_types,
            atomids,
            anames,
            altlocs,
            resnames,
            resnums,
            icodes,
            chains,
            trajectory,
            occupancies,
            bs,
            segs,
            atypes,
            charges,
            bonds=bonds,
            bond_types=bond_types,
            name=name,
        )

        msys._sdf_data = sdf_data

        return msys



[docs]
    @classmethod
    def from_arrays(
        cls,
        anames: ArrayLike,
        atypes: ArrayLike,
        resnames: ArrayLike,
        resindices: ArrayLike,
        resnums: ArrayLike,
        segindices: ArrayLike,
        record_types: ArrayLike = None,
        altlocs: ArrayLike = None,
        icodes: ArrayLike = None,
        segids: ArrayLike = None,
        trajectory: ArrayLike = None,
        **kwargs,
    ) -> MolSys:
        """
        Create a MolSys object from a minimal set of arrays.

        Parameters
        ----------
        anames : ArrayLike
            Array of atom names.
        atypes : ArrayLike
            Array of atom types/elements.
        resnames : ArrayLike
            Array of residue names for each atom.
        resindices : ArrayLike
            Array of residue indices for each atom.
        resnums : ArrayLike
            Array of residue numbers for each atom.
        segindices : ArrayLike
            Array of segment indices for each atom.
        record_types : ArrayLike | None
            Array of record types for each atom, e.g. ATOM or HETATM
        altlocs: ArrayLike | None
            Array of alternate location specifies for each atom.
        icodes: ArrayLike | None
            Array of insertion codes for each atom.
        segids : ArrayLike
            Array of segment IDs for each atom.
        trajectory : ArrayLike
            Array of cartesian coordinates for each atom and each state of an ensemble or frame of a trajectory.
        kwargs : dict
            Dictionary of additional arguments to pass the the :class:`MolSys` constructor.
        Returns
        -------
        cls : MolSys
            Molecular system object of the PDB structure.
        """
        anames = np.asarray(anames)
        atypes = np.asarray(atypes)
        resindices = np.asarray(resindices)
        segindices = np.asarray(segindices)

        n_atoms = len(anames)
        atomids = np.arange(n_atoms)
        altlocs = np.array([""] * n_atoms) if altlocs is None else np.array(altlocs)
        icodes = np.array([""] * n_atoms) if icodes is None else np.array(icodes)

        if len(resnums) != n_atoms:
            resnums = np.array([resnums[residx] for residx in resindices])
        else:
            resnums = np.asarray(resnums)

        if len(resnames) != n_atoms:
            resnames = np.array([resnames[residx] for residx in resindices])
        else:
            resnames = np.asarray(resnames)

        if len(icodes) != n_atoms:
            icodes = np.array([icodes[residx] for residx in resindices])
        else:
            icodes = np.asarray(icodes)

        if len(segindices) != n_atoms:
            segindices = np.array([segindices[x] for x in resindices])

        if segids is None:
            segids = np.array(["ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i] for i in segindices])
        elif len(segids) != n_atoms:
            segids = np.array([segids[i] for i in segindices])
        else:
            segids = np.asarray(segids)

        chains = segids

        if trajectory is None:
            trajectory = np.empty((1, n_atoms, 3))
        elif len(trajectory.shape) == 2:
            trajectory = trajectory[None, ...]

        if record_types is None:
            record_types = np.array(
                [
                    "ATOM" if rnme in chilife.SUPPORTED_RESIDUES else "HETATM"
                    for rnme in resnames
                ]
            )

        occupancies = np.ones(n_atoms)
        bs = np.ones(n_atoms)
        charges = kwargs.pop("charges", np.zeros(n_atoms))

        return cls(
            record_types,
            atomids,
            anames,
            altlocs,
            resnames,
            resnums,
            icodes,
            chains,
            trajectory,
            occupancies,
            bs,
            segids,
            atypes,
            charges,
            **kwargs,
        )



[docs]
    @classmethod
    def from_atomsel(cls, atomsel, frames=None):
        """
        Create a new MolSys object from an existing :class:`~AtomSelection` or
        `MDAnalysis.AtomGroup <https://userguide.mdanalysis.org/stable/atomgroup.html>`_ object.

        Parameters
        ----------
        atomsel : :class:`~AtomSelection`, MDAnalysis.AtomGroup
            The :class:`~AtomSelection` or MDAnalysis.AtomGroup from which to create the new MolSys object.
        frames : int, Slice, ArrayLike
            Frame index, slice or array of frame indices corresponding to the frames of the atom selection you wish to
            extract into a new :class:`~MolSys` object.

        Returns
        -------
        cls : MolSys
            Molecular system object of the PDB structure.
        """

        if isinstance(atomsel, (MDAnalysis.AtomGroup, AtomSelection)):
            U = atomsel.universe
        elif isinstance(atomsel, MDAnalysis.Universe):
            U = atomsel
            atomsel = U.atoms

        if frames is None:
            frames = slice(0, len(U.trajectory))

        record_types = (
            atomsel.record_types if hasattr(atomsel, "record_types") else None
        )
        icodes = atomsel.icodes if hasattr(atomsel, "icodes") else None
        anames = atomsel.names
        atypes = atomsel.types
        resnames = atomsel.resnames
        resnums = atomsel.resnums
        ridx_map = {num: i for i, num in enumerate(np.unique(atomsel.resindices))}
        resindices = np.array([ridx_map[num] for num in atomsel.resindices])
        segids = atomsel.segids
        sidx_map = {num: i for i, num in enumerate(np.unique(atomsel.segindices))}
        segindices = np.array([sidx_map[num] for num in atomsel.segindices])

        if (
            isinstance(atomsel, MDAnalysis.AtomGroup)
            and hasattr(atomsel, "bonds")
            and len(atomsel.bonds) > 0
        ):
            bonds = atomsel.bonds.indices
            bond_types = np.array([BondType.UNSPECIFIED] * len(bonds))
        elif isinstance(atomsel, MolecularSystemBase) and atomsel.bonds is not None:
            bonds = atomsel.bonds
            bond_types = atomsel.bond_types
        else:
            bonds = None
            bond_types = None

        # remap bond indices
        if bonds is not None:
            k = {idx: i for i, idx in enumerate(atomsel.ix)}
            bonds = np.array(
                [[k[a1], k[a2]] for a1, a2 in bonds if a1 in k and a2 in k], dtype=int
            )

        if hasattr(U.trajectory, "coordinate_array"):
            trajectory = U.trajectory.coordinate_array[frames][:, atomsel.ix, :]
        else:
            trajectory = []
            for ts in U.trajectory[frames]:
                trajectory.append(atomsel.positions)
            trajectory = np.array(trajectory)

        return cls.from_arrays(
            anames,
            atypes,
            resnames,
            resindices,
            resnums,
            segindices,
            segids=segids,
            trajectory=trajectory,
            record_types=record_types,
            icodes=icodes,
            bonds=bonds,
            bond_types=bond_types,
        )



[docs]
    @classmethod
    def from_rdkit(cls, mol):
        """
        Create a MolSys from an rdkit Mol object with embedded conformers.

        Parameters
        ----------
        mol : rdkit.Chem.rdchem.Mol
            An rdkit Mol object.

        Returns
        -------
        cls: MolSys
            Molecular system object of the PDB structure.
        """
        atypes = np.array([a.GetSymbol() for a in mol.GetAtoms()])
        anames = np.array([a + str(i) for i, a in enumerate(atypes)])
        resnames = np.array(["UNK" for _ in anames])
        resindices = np.array([0] * len(anames))
        resnums = np.array([1] * len(anames))
        segindices = np.array([0] * len(anames))
        segids = np.array(["A"] * len(anames))
        trajectory = np.array([conf.GetPositions() for conf in mol.GetConformers()])
        bonds = np.array(
            [[b.GetBeginAtomIdx(), b.GetEndAtomIdx()] for b in mol.GetBonds()]
        )
        bond_types = np.array([BondType(b.GetBondType()) for b in mol.GetBonds()])
        return cls.from_arrays(
            anames,
            atypes,
            resnames,
            resindices,
            resnums,
            segindices,
            segids=segids,
            trajectory=trajectory,
            bonds=bonds,
            bond_types=bond_types,
        )



[docs]
    @classmethod
    def from_openMM(cls, simulation):
        """
        Create a MolSys from an openMM Simulation object.

        Parameters
        ----------
        simulation : openmm.simulation.Simulation
            The simulation object to create the MolSys object from.

        Returns
        -------
        sys : chilife.MolSys
            chiLife molecular system as a clone from
        """
        try:
            from openmm.unit import angstrom
        except:
            raise RuntimeError(
                "You must have the optional openMM dependency installed to use from_openMM"
            )

        top = simulation.topology
        atypes = np.array([a.element.symbol for a in top.atoms()])
        anames = np.array([a.name for a in top.atoms()])
        resnames = np.array([a.residue.name for a in top.atoms()])
        resindices = np.array([a.residue.index for a in top.atoms()])
        resnums = np.array([int(a.residue.id) for a in top.atoms()])
        segindices = np.array([a.residue.chain.index for a in top.atoms()])
        segids = np.array([a.residue.chain.id for a in top.atoms()])
        trajectory = (
            simulation.context.getState(getPositions=True)
            .getPositions(asNumpy=True)
            .value_in_unit(angstrom)
        )
        bonds = np.array([[bond.atom1.index, bond.atom2.index] for bond in top.bonds()])
        # bond_types = np.array([  for bond in top.bonds()])

        sys = cls.from_arrays(
            anames,
            atypes,
            resnames,
            resindices,
            resnums,
            segindices,
            segids=segids,
            trajectory=trajectory,
            bonds=bonds,
            # bond_types=bond_types,
        )
        return sys



[docs]
    def copy(self):
        """Create a deep copy of the MolSys."""

        if hasattr(self, "bonds"):
            bonds = self.bonds
            bond_types = self.bond_types
        else:
            bonds = None
            bond_types = None

        return MolSys(
            record_types=self.record_types,
            atomids=self.atomids,
            names=self.names,
            altlocs=self.altlocs,
            resnames=self.resnames,
            resnums=self.resnums,
            icodes=self.icodes,
            chains=self.chains,
            trajectory=self.trajectory.coords,
            occupancies=self.occupancies,
            bs=self.bs,
            segs=self.segs,
            atypes=self.atypes,
            charges=self.charges,
            bonds=bonds,
            bond_types=bond_types,
            name=self.names,
        )



[docs]
    def load_new(self, coordinates):
        """
        Load a new set of 3-dimensional coordinates into the MolSys.

        Parameters
        ----------
        coordinates : ArrayLike
            Set of new coordinates to load into the MolSys
        """
        self.trajectory.load_new(coordinates=coordinates)


    @property
    def _atoms(self):
        if not hasattr(self, "_Atoms"):
            self._Atoms = np.array(
                [Atom(weakref.proxy(self), i) for i in range(self.n_atoms)]
            )
        return self._Atoms

    @property
    def _residues(self):
        if not hasattr(self, "_residue_list"):
            _, first_res_ix = np.unique(self.resixs, return_index=True)
            self._residue_list = [
                Residue(weakref.proxy(self), [ix]) for ix in first_res_ix
            ]

        return self._residue_list

    @property
    def _segments(self):
        if not hasattr(self, "_segments_list"):
            _, first_seg_ix = np.unique(self.segixs, return_index=True)
            self._segments_list = [
                Segment(weakref.proxy(self), ix) for ix in first_seg_ix
            ]
        return self._segments_list

    @property
    def molsys(self):
        return self




[docs]
class Trajectory:
    """
    Object containing and managing 3-dimensional coordinates of :class:`~MolSys` objects, particularly when there are
    multiple states or time steps.

    Parameters
    ----------
    coordinates : np.ndarray
        3-dimensional coordinates for all atoms and all states/frames of the associated :class:`~MolSys` object.
    molsys : MolSys
        The associated :class:`~MolSys` object.
    timestep : float
        The time step or change in time between frames/states of the trajectory/ensemble.
    """

    def __init__(self, coordinates: np.ndarray, molsys: MolSys, timestep: float = 1):
        self.molsys = molsys
        self.timestep = timestep
        self.coords = coordinates
        self.coordinate_array = coordinates
        self._frame = 0
        self.time = np.arange(0, len(self.coords)) * self.timestep


[docs]
    def load_new(self, coordinates):
        """
        Load a new set (trajectory or ensemble) of 3-dimensional coordinates into the Trajectory.

        Parameters
        ----------
        coordinates : ArrayLike
            Set of new coordinates to load into the MolSys
        """
        self.coords = coordinates
        self.coordinate_array = coordinates
        self.time = np.arange(0, len(self.coords)) * self.timestep


    def __getitem__(self, item):
        if isinstance(item, (slice, list, np.ndarray)):
            return TrajectoryIterator(self, item)

        elif isinstance(item, numbers.Integral):
            self._frame = item
            return self.time[self._frame]

    def __iter__(self):
        return iter(TrajectoryIterator(self))

    def __len__(self):
        return len(self.time)

    @property
    def frame(self):
        """The frame number the trajectory is currently on"""
        return self._frame

    @frame.setter
    def frame(self, value):
        self._frame = value



class TrajectoryIterator:
    """An iterator object for looping over :class:`~Trajectory` objects."""

    def __init__(self, trajectory, arg=None):
        self.trajectory = trajectory
        if arg is None:
            self.indices = range(len(self.trajectory.time))
        elif isinstance(arg, slice):
            self.indices = range(*arg.indices(len(self.trajectory.time)))
        elif isinstance(arg, (np.ndarray, list, tuple)):
            if np.issubdtype(arg.dtype, np.integer):
                self.indices = arg
            elif np.dtype == bool:
                self.indices = np.argwhere(arg)
            else:
                raise TypeError(f"{arg.dtype} is not a valid indexor for a trajectory")
        else:
            raise TypeError(f"{arg} is not a valid indexor for a trajectory")

    def __iter__(self):
        for frame in self.indices:
            yield self.trajectory[frame]

    def __len__(self):
        return len(self.indices)


def process_statement(statement, logickws, subjectkws):
    """
    Parse a selection string to identify subsets of atoms that satisfy the given conditions.

    Parameters
    ----------
    statement : str
        The selection string to parse.
    logickws : dict
        Dictionary of logic keywords (strings) mapped to the associated logic function.
    subjectkws : dict
        Dictionary of subject keywords (strings) mapped to the associated subject attributes.

    Returns
    -------
    mask : np.ndarray
        Array of subject indices that satisfy the given conditions.
    """
    sub_statements = parse_paren(statement)

    mask = np.ones(subjectkws["_len"], dtype=bool)
    operation = None

    for stat in sub_statements:
        if stat.startswith("("):
            tmp = process_statement(stat, logickws, subjectkws)
            mask = operation(mask, tmp) if operation else tmp
            continue

        stat_split = stat.split()
        if stat_split[0] not in logickws and stat_split[0] not in subjectkws:
            raise ValueError(
                f"Invalid selection keyword: {stat_split[0]}. All selections statements and substatements must start "
                f"with a valid selection keyword"
            )

        subject = None
        values = []
        while len(stat_split) > 0:
            if stat_split[0] in subjectkws:
                subject = subjectkws[stat_split.pop(0)]
                values = []

            elif stat_split[0] in logickws:
                if subject is not None:
                    values = np.array(values, dtype=subject.dtype)
                    tmp = process_sub_statement(subject, values)
                    subject, values = None, []
                    mask = operation(mask, tmp) if operation else tmp

                # Get next operation
                operation, stat_split = build_operator(stat_split, logickws)

            else:
                values += parse_value(stat_split.pop(0))

        if subject is not None:
            values = np.array(values, dtype=subject.dtype)
            tmp = process_sub_statement(subject, values)
            mask = operation(mask, tmp) if operation else tmp
            operation = None

        elif not values and hasattr(operation, "novals"):
            mask = operation(mask, np.ones_like(mask, dtype=bool))

    return mask


def parse_paren(string):
    """
    Break down a given string to parenthetically defined subcomponents

    Parameters
    ----------
    string : str
        The string to break down.

    Returns
    -------
    results : List[str]
        List of parenthetically defined subcomponents.
    """
    stack = 0
    start_idx = 0
    end_idx = 0
    results = []

    for i, c in enumerate(string):
        if c == "(":
            if stack == 0:
                start_idx = i + 1
                if i != 0:
                    results.append(string[end_idx:i].strip())
            stack += 1
        elif c == ")":
            # pop stack
            stack -= 1

            if stack == 0:
                results.append(f"({string[start_idx:i].strip()})")
                start_idx = end_idx
                end_idx = i + 1

    if end_idx != len(string) - 1:
        results.append(string[end_idx:].strip())

    results = [res for res in results if res != ""]
    if len(results) == 1 and results[0].startswith("("):
        results = parse_paren(results[0][1:-1])

    if stack != 0:
        raise RuntimeError(
            "The provided statement is missing a parenthesis or has an extra one."
        )
    return results


def check_operation(operation, stat_split, logickws):
    """
    Given an advanced operation, identify the additional arguments being passed to the advanced operation and create
    a simple operation that defined specifically for the provided arguments.
    Parameters
    ----------
    operation : callable
        A function or callable object that operates on molecular system attributes.
    stat_split : List[str]
        the arguments associated with the operation
    logickws : dict
        Dictionary of logic keywords (strings) mapped to the associated logic function.

    Returns
    -------
    operation : callable
        A simplified version of the provided operation now accounting for the user provided parameters.
    """
    advanced_operators = (
        logickws["within"],
        logickws["around"],
        logickws["byres"],
        logickws["point"],
    )
    if operation in advanced_operators:
        outer_operation = logickws["and"]
        args = [stat_split.pop(i) for i in range(1, 1 + operation.nargs)]
        operation = partial(operation, *args)

        def toperation(a, b, outer_operation, _io):
            return outer_operation(a, _io(b))

        operation = partial(toperation, outer_operation=outer_operation, _io=operation)

    return operation


def build_operator(stat_split, logickws):
    """
    A function to combine multiple sequential operators into one.

    Parameters
    ----------
    stat_split : List[str]
        The list of string arguments passed by the user to define the sequence of operations
    logickws : dict
        Dictionary of logic keywords (strings) mapped to the associated logic function.

    Returns
    -------
    operation : callable
        A compression of sequential operations combined into one.
    split_stat : List[str]
        Any remaining arguments passed by the user that were not used to define ``operation``
    """
    operation = logickws["and"]
    unary_operators = (logickws["not"], logickws["byres"])
    binary_operators = (logickws["and"], logickws["or"])
    advanced_operators = (logickws["within"], logickws["around"], logickws["point"])

    while _io := logickws.get(stat_split[0], False):
        if _io in unary_operators:

            def toperation(a, b, operation, _io):
                return operation(a, _io(b))

            operation = partial(toperation, operation=operation, _io=_io)
            stat_split = stat_split[1:]

        elif _io in advanced_operators:
            stat_split = stat_split[1:]
            args = [stat_split.pop(0) for i in range(_io.nargs)]
            _io = partial(_io, *args)

            def toperation(a, b, operation, _io):
                return operation(a, _io(b))

            operation = partial(toperation, operation=operation, _io=_io)

            if hasattr(_io.func, "novals"):
                operation.novals = _io.func.novals

        elif _io in binary_operators:
            if operation != logickws["and"]:
                raise RuntimeError(
                    "Cannot have two binary logical operators in succession"
                )
            operation = _io
            stat_split = stat_split[1:]

        if len(stat_split) == 0:
            break

    return operation, stat_split


def parse_value(value):
    """
    Helper function to parse values that may have slices.

    Parameters
    ----------
    value : str, int, ArrayLike[int]
        Values to parse.

    Returns
    -------
    return_value : List
        A list of values that satisfy the input.

    """
    return_value = []
    if "-" in value:
        start, stop = [int(x) for x in value.split("-")]
        return_value += list(range(start, stop + 1))
    elif ":" in value:
        start, stop = [int(x) for x in value.split(":")]
        return_value += list(range(start, stop + 1))
    else:
        return_value.append(value)

    return return_value


def process_sub_statement(subject, values):
    if subject.dtype == bool:
        return subject

    return np.isin(subject, values)



[docs]
class AtomSelection(MolecularSystemBase):
    """
    An object containing a group of atoms from a :class:`~MolSys` object

    Parameters
    ----------
    molsys : :class:`~MolSys`
        The :class:`~MolSys` object from which the AtomSelection belongs to.
    mask : np.ndarray
        An array of atom indices that defines the atoms of ``molsys`` that make up the atom selection.
    """

    def __new__(cls, *args):
        if len(args) == 2:
            molsys, mask = args
        else:
            return object.__new__(cls)

        if isinstance(mask, int):
            return Atom(molsys, mask)
        else:
            return object.__new__(cls)

    def __init__(self, molsys, mask, bond_mask=None):
        self.molsys = molsys
        self.mask = mask
        self._update_bond_mask(bond_mask)

    def __getitem__(self, item):
        if np.issubdtype(type(item), np.integer):
            relidx = self.molsys.ix[self.mask][item]
            return self.molsys._atoms[relidx]

        if isinstance(item, slice):
            return AtomSelection(self.molsys, self.mask[item])

        elif isinstance(item, (np.ndarray, list, tuple)):
            item = np.asarray(item)
            if len(item) == 1 or item.sum() == 1:
                return self.molsys._atoms[self.mask[item][0]]
            elif item.dtype == bool:
                item = np.argwhere(item).T[0]
                return AtomSelection(self.molsys, item)
            else:
                return AtomSelection(self.molsys, self.mask[item])

        elif hasattr(item, "__iter__"):
            if all([np.issubdtype(type(x), int) for x in item]):
                return AtomSelection(self.molsys, self.mask[item])

        raise TypeError(
            "Only integer, slice type, and boolean mask arguments are supported at this time"
        )

    def __len__(self):
        return len(self.mask)




[docs]
class ResidueSelection(MolecularSystemBase):
    """
    An object containing a group of residues and all their atoms from a :class:`~MolSys` object. Note that if one atom of
    a residue is present in the AtomSelection used to create the group, all atoms of that residue will be present in
    the ResidueSelection object.

    .. note::
        Unlike a regular :class:`~AtomSelection` object, the ``resnames``, ``resnums``, ``segids`` and ``chains``
        attributes of this object are tracked with respect to residue not atom and changes made to these attributes
        on this object will not be present in the parent object and vice versa.

    Parameters
    ----------
    molsys : :class:`~MolSys`
        The :class:`~MolSys` object from which the AtomSelection belongs to.
    mask : np.ndarray
        An array of atom indices that defines the atoms of ``molsys`` that make up the atom selection.
    bond_mask : np.ndarray
        boolean array specifying which bonds belong to the ``ResidueSelection``.
    """

    def __init__(self, molsys, mask, bond_mask=None):
        resixs = np.unique(molsys.resixs[mask])
        self.mask = np.argwhere(np.isin(molsys.resixs, resixs)).flatten()
        self.molsys = molsys

        _, self.first_ix = np.unique(molsys.resixs[self.mask], return_index=True)

        if len(self.mask) > 1:
            self.resixs = self.resixs[self.first_ix].flatten()
            self.resnames = self.resnames[self.first_ix].flatten()
            self.resnums = self.resnums[self.first_ix].flatten()
            self.segids = self.segids[self.first_ix].flatten()
            self.chains = self.chains[self.first_ix].flatten()

        self._update_bond_mask(bond_mask)

    def __setattr__(self, key, value):
        self.__dict__[key] = value

    def __getitem__(self, item):
        resixs = np.unique(self.molsys.resixs[self.mask])
        new_resixs = resixs[item]
        new_mask = np.argwhere(np.isin(self.molsys.resixs, new_resixs)).flatten()

        if np.issubdtype(type(item), int):
            return self.molsys._residues[new_resixs]
        elif isinstance(item, slice):
            return ResidueSelection(self.molsys, new_mask)
        elif hasattr(item, "__iter__"):
            if all([np.issubdtype(type(x), int) for x in item]):
                return ResidueSelection(self.molsys, new_mask)

        raise TypeError(
            "Only integer and slice type arguments are supported at this time"
        )

    def __len__(self):
        return len(self.first_ix)

    def __iter__(self):
        for resix in self.resixs:
            yield self.molsys._residues[resix]




[docs]
class SegmentSelection(MolecularSystemBase):
    """
    An object containing a group of segments and all their atoms from a :class:`~MolSys` object. Note that if one atom
    of a segment is present in the AtomSelection used to create the group, all atoms of that segment will be present in
    the SegmentSelection object.

    .. note::
        Unlike a regular :class:`~AtomSelection` object, the ``segids`` and ``chains``
        attributes of this object are tracked with respect to segments not atoms and changes made to these attributes
        on this object will not be present in the parent object and vice versa.

    Parameters
    ----------
    molsys : :class:`~MolSys`
        The :class:`~MolSys` object from which the AtomSelection belongs to.
    mask : np.ndarray
        An array of atom indices that defines the atoms of ``molsys`` that make up the atom selection.
    bond_mask : np.ndarray
        boolean array specifying which bonds belong to the ``SegmentSelection``.
    """

    def __init__(self, molsys, mask, bond_mask=None):
        self.seg_ixs = np.unique(molsys.segixs[mask])
        self.mask = np.argwhere(np.isin(molsys.segixs, self.seg_ixs)).flatten()
        self.molsys = molsys

        _, self.first_ix = np.unique(molsys.segixs[self.mask], return_index=True)

        self.segids = self.segids[self.first_ix].flatten()
        self.chains = self.chains[self.first_ix].flatten()
        self._update_bond_mask(bond_mask)

    def __setattr__(self, key, value):
        self.__dict__[key] = value

    def __getitem__(self, item):
        segixs = np.unique(self.molsys.segixs[self.mask])
        new_segixs = segixs[item]
        new_mask = np.argwhere(np.isin(self.molsys.segixs, new_segixs))

        if np.issubdtype(type(item), int):
            return self.molsys._segments[new_segixs]
        elif isinstance(item, slice):
            return SegmentSelection(self.molsys, new_mask)
        elif hasattr(item, "__iter__"):
            if all([np.issubdtype(type(x), int) for x in item]):
                return SegmentSelection(self.molsys, new_mask)

        raise TypeError(
            "Only integer and slice type arguments are supported at this time"
        )

    def __len__(self):
        return len(self.first_ix)

    def __iter__(self):
        for new_segix in self.seg_ixs:
            yield self.molsys._segments[new_segix]




[docs]
class Atom(MolecularSystemBase):
    """
    An object containing information for a single atom.

    Parameters
    ----------
    molsys : :class:`~MolSys`
        The :class:`~MolSys` object from which the Atom belongs to.
    mask : np.ndarray
        The index that defines the atoms of ``molsys`` that make up the atom selection.
    """

    def __init__(self, molsys, mask):
        self.molsys = molsys
        self.index = mask
        self.mask = mask

    @property
    def position(self):
        """Cartesian coordinates of the atom in angstroms."""
        return self.molsys.coords[self.index]

    @property
    def bonds(self):
        """list of :class:`AtomSelection` objects containing the atoms for each bond that this atom is involved in"""
        idxs = self.topology.bonds_any_atom[self.mask]
        return [AtomSelection(self.molsys, idx) for idx in idxs]

    @property
    def angles(self):
        """list of :class:`AtomSelection` objects containing the atoms for each angle that this atom is involved in"""
        idxs = self.topology.angles_any_atom[self.mask]
        return [AtomSelection(self.molsys, idx) for idx in idxs]

    @property
    def dihedrals(self):
        """list of :class:`AtomSelection` objects containing the atoms for each dihedral that this atom is involved
        in"""
        idxs = self.topology.dihedrals_any_atom[self.mask]
        return [AtomSelection(self.molsys, idx) for idx in idxs]

    @property
    def bonded_atoms(self):
        """:class:`AtomSelection` object containing all atoms that this atom is bonded to. Does not contain this atom"""
        all_idxs = np.unique(np.concatenate(self.topology.bonds_any_atom[self.mask]))
        all_idxs = np.array([idx for idx in all_idxs if idx != self.mask])
        return AtomSelection(self.molsys, all_idxs)




[docs]
class Residue(MolecularSystemBase):
    """
    An object representing a single residue.

    Parameters
    ----------
    molsys : :class:`~MolSys`
        The :class:`~MolSys` object from which the Residue belongs to.
    mask : int
        An array of atom indices that defines the atoms of the residue
    bond_mask : np.ndarray
        boolean array specifying which bonds belong to the ``Residue``.
    """

    def __init__(self, molsys, mask, bond_mask=None):
        resix = np.unique(molsys.resixs[mask])[0]
        self.mask = np.where(molsys.resixs == resix)[0]
        self.molsys = molsys
        self._update_bond_mask(bond_mask)
        self.resindex = resix
        self.segindex = molsys.segixs[self.mask][0]

    def __len__(self):
        return len(self.mask)


[docs]
    def phi_selection(self):
        """
        Get an :class:`~AtomSelection` of the atoms defining the Phi backbone dihedral angle of the residue.

        Returns
        -------
        sel : AtomSelection
            An :class:`~AtomSelection` object containing the atoms that make up the Phi backbone dihedral angle of
            the selected residue.
        """

        prev = self.previous_residue()
        if prev is None:
            return None

        mask_phi = [
            prev.ix[prev.names == "C"].flat[0],
            self.ix[self.names == "N"].flat[0],
            self.ix[self.names == "CA"].flat[0],
            self.ix[self.names == "C"].flat[0],
        ]

        sel = self.molsys.atoms[mask_phi]
        return sel if len(sel) == 4 else None



[docs]
    def psi_selection(self):
        """
        Get an :class:`~AtomSelection` of the atoms defining the Psi backbone dihedral angle of the residue.

        Returns
        -------
        sel : AtomSelection
            An :class:`~AtomSelection` object containing the atoms that make up the Psi backbone dihedral angle of
            the selected residue.
        """

        nex = self.next_residue()
        if nex is None:
            return None

        mask_psi = [
            self.ix[self.names == "N"].flat[0],
            self.ix[self.names == "CA"].flat[0],
            self.ix[self.names == "C"].flat[0],
            nex.ix[nex.names == "N"].flat[0],
        ]
        sel = self.molsys.atoms[mask_psi]
        return sel if len(sel) == 4 else None


    def previous_residue(self, ignore_chain_breaks=False):
        resix = self.resix - 1
        res = self.molsys._residues[resix] if resix >= 0 else None

        if ignore_chain_breaks or res is None:
            return res

        elif self.resnum - 1 == res.resnum and self.segix == res.segix:
            return res
        else:
            return None

    def next_residue(self, ignore_chain_breaks=False):
        resix = self.resix + 1
        res = (
            self.molsys._residues[resix] if resix <= self.molsys.resixs.max() else None
        )

        if ignore_chain_breaks or res is None:
            return res

        elif self.resnum + 1 == res.resnum and self.segix == res.segix:
            return res
        else:
            return None




[docs]
class Segment(MolecularSystemBase):
    """
    An object representing a single segment of a molecular system.

    Parameters
    ----------
    molsys : MolSys
        The :class:`~MolSys` object from which the segment belongs to.
    mask : int
        An array of atom indices that defines the atoms of the segment.
    """

    def __init__(self, molsys, mask):
        segix = np.unique(molsys.segixs[mask])[0]
        self.mask = np.where(molsys.segixs == segix)[0]
        self.molsys = molsys
        self.segindex = segix

        self._update_bond_mask(mask)



def byres(mask, molsys):
    """Advanced operator to select atoms by residue"""
    residues = np.unique(molsys.resixs[mask])
    mask = np.isin(molsys.resixs, residues)
    return mask


def unot(mask):
    """Unitary `not` operator"""
    return ~mask


def within(distance, mask, molsys):
    """
    Advanced logic operator to identify atoms within a user defined distance of another selection.

    Parameters
    ----------
    distance : float
        The distance window defining which atoms will be included in the selection.
    mask : np.ndarray
        A boolean array defining a subset of  atoms of a :class:`~MolSys` from which the distance cutoff will be
        measured by.
    molsys : :class:`~MolSys`
        A chiLife :class:`~MolSys` from which to select atoms from .

    Returns
    -------
    out_mask : np.ndarray
        A boolean array defining a subset of  atoms of a :class:`~MolSys` that are within the user defined distance
        of the user defined selection.
    """
    distance = float(distance)
    tree1 = cKDTree(molsys.coords)
    tree2 = cKDTree(molsys.molsys.coords[mask])
    results = np.concatenate(tree2.query_ball_tree(tree1, distance))
    results = np.unique(results)
    out_mask = np.zeros_like(mask, dtype=bool)
    out_mask[results] = True
    out_mask = out_mask * ~mask
    return out_mask


within.nargs = 1


def point(x, y, z, distance, mask, molsys):
    """
    Advanced logic operator to identify atoms within a user defined distance of a point in cartesian space.

    Parameters
    ----------
    x : float
        The x coordinate of the point in cartesian space.
    y : float
        The y coordinate of the point in cartesian space.
    z : float
        The z coordinate of the point in cartesian space.
    distance : float
        The distance window defining which atoms will be included in the selection.
    mask : np.ndarray
        A boolean array defining a subset of  atoms of a :class:`~MolSys` from which the distance cutoff will be
        measured by.
    molsys : :class:`~MolSys`
        A chiLife :class:`~MolSys` from which to select atoms from .

    Returns
    -------
    out_mask : np.ndarray
        A boolean array defining a subset of  atoms of a :class:`~MolSys` that are within the user defined distance
        of the user defined selection.
    """

    tree1 = cKDTree(molsys.coords[mask])
    args = tree1.query_ball_point(np.array([x, y, z], dtype=float), distance)
    out_mask = np.zeros_like(mask, dtype=bool)
    out_mask[args] = True

    return out_mask


point.nargs = 4
point.novals = True


def concat_molsys(*systems):
    """
    Function to concatenate two or more :class:`~MolSys` objects into a single :class:`~MolSys` object. Atoms will be
    concatenated in the order they are placed in the list.

    Parameters
    ----------
    systems : List[MolSys]
        List of :class:`~MolSys` objects to concatenate.

    Returns
    -------
    mol : MolSys
        The concatenated `~MolSys` object.
    """
    for sys in systems:
        if not isinstance(sys, MolecularSystemBase):
            raise TypeError("Can only concatenate MolecularSystemBase instances")

    atom_ids = []
    record_types = []
    anames = []
    altlocs = []
    atypes = []
    resnames = []
    resnums = []
    resindices = []
    icodes = []
    chains = []
    occupancies = []
    bs = []
    segs = []
    trajectory = []
    segindices = []
    segids = []
    charges = []
    chiral = []
    bonds = []
    bond_types = []
    bond_chiral = []
    names = []
    n_atoms = 0
    n_residues = 0
    n_chains = 0

    traj_len = len(systems[0].trajectory)

    has_bonds = any(sys.bonds is not None for sys in systems)

    for sys in systems:
        record_types.append(sys.record_types)
        atom_ids.append(sys.atomids)
        anames.append(sys.names)
        altlocs.append(sys.altlocs)
        resnames.append(sys.resnames)
        resnums.append(sys.resnums)
        icodes.append(sys.icodes)
        chains.append(sys.chains)
        occupancies.append(sys.occupancies)
        bs.append(sys.bs)
        segs.append(sys.segids)
        atypes.append(sys.atypes)
        charges.append(sys.charges)
        chiral.append(sys.chiral)

        if len(sys.trajectory) != traj_len:
            raise AttributeError(
                "Atom selections must have the same trajectory lengths to be combined"
            )

        trajectory.append(sys.trajectory.coordinate_array[:, sys.mask])

        # New bonds
        if not has_bonds:
            bonds = None
            bond_types = None
            bond_chiral = None
        elif sys.bonds is None:
            bonds.append(np.array([[]]))
            bond_types.append(np.array([]))
            bond_chiral.append(np.array([]))
        else:
            old_atom_ixs = sys.ix
            atom_ix_map = {old: new + n_atoms for new, old in enumerate(old_atom_ixs)}
            b_mask = np.all(np.isin(sys.bonds, old_atom_ixs), axis=1)

            sys_i_bonds = sys.bonds[b_mask]
            new_bonds = [[atom_ix_map[a], atom_ix_map[b]] for a, b in sys_i_bonds]
            bonds.append(new_bonds)
            bond_types.append(sys.bond_types[b_mask])
            bond_chiral.append(sys.bond_chiral[b_mask])

        # New resinidces starting from 0
        new_resindices = np.zeros_like(sys.resindices)
        for i, res in enumerate(sys.residues):
            mask = sys.resindices == res.resindex
            new_resindices[mask] = i + n_residues
        resindices.append(new_resindices)

        # New segindices starting from 0
        new_segindices = np.zeros_like(sys.segindices)
        for i, seg in enumerate(sys.segments):
            mask = sys.segindices == seg.segindex
            new_segindices[mask] = i + n_chains
        segindices.append(new_segindices)

        n_atoms += len(sys.atoms)
        n_residues += len(sys.residues)
        n_chains += len(sys.segments)
        names.append(sys.name)

    record_types = np.concatenate(record_types)
    atom_ids = np.concatenate(atom_ids)
    anames = np.concatenate(anames)
    altlocs = np.concatenate(altlocs)
    resnames = np.concatenate(resnames)
    resnums = np.concatenate(resnums)
    icodes = np.concatenate(icodes)
    chains = np.concatenate(chains)
    trajectory = np.concatenate(trajectory, axis=1)
    occupancies = np.concatenate(occupancies)
    bs = np.concatenate(bs)
    segs = np.concatenate(segs)
    atypes = np.concatenate(atypes)
    charges = np.concatenate(charges)
    chiral = np.concatenate(chiral) if np.any([x is not None for x in chiral]) else None
    resindices = np.concatenate(resindices)
    segindices = np.concatenate(segindices)

    if has_bonds:
        bonds = np.concatenate(bonds)
        bond_types = np.concatenate(bond_types)
        bond_chiral = np.concatenate(bond_chiral)

    if len(names) <= 3:
        name = "_and_".join(names)
    else:
        name = f"concatenation of {len(systems)} atom selections"

    return MolSys(
        record_types,
        atom_ids,
        anames,
        altlocs,
        resnames,
        resnums,
        icodes,
        chains,
        trajectory,
        occupancies,
        bs,
        segs,
        atypes,
        charges,
        chiral,
        resindices,
        segindices,
        bonds,
        bond_types,
        bond_chiral,
        name,
    )


def res_to_ccd(res):
    """
    Create a CCD entry for a :class:`Residue`.

    Parameters
    ----------
    res : Residue
        The residue to create the CCD entry for.

    Returns
    -------
    ccd_data : dict
        Dictionary containing the ``chem_comp_atom``, ``chem_comp_bond`` data derived from the atoms ond bonds of the
        :class:`Residue`.
    """

    chem_comp_atom = {
        "comp_id": [str(res.resname)] * len(res.atoms),
        "atom_id": res.names.tolist(),
        "type_symbol": res.elements.tolist(),
        "pdbx_aromatic_flag": ["?"] * len(res.atoms),
        "pdbx_stereo_config": res.chiral.tolist(),
    }

    bond_atom_names = res.bond_atom_names
    bond_types = [chilife.io.XL_BOND_TO_CIF[btype] for btype in res.bond_types]
    if len(bond_atom_names) > 0:
        chem_comp_bond = {
            "comp_id": [str(res.resname)] * len(res.bonds),
            "atom_id_1": bond_atom_names[:, 0].tolist(),
            "atom_id_2": bond_atom_names[:, 1].tolist(),
            "value_order": bond_types,
            "pdbx_aromatic_flag": ["?"] * len(res.bonds),
            "pdbx_stereo_config": res.bond_chiral.tolist(),
        }
    else:
        chem_comp_bond = {
            "comp_id": [],
            "atom_id_1": [],
            "atom_id_2": [],
            "value_order": [],
            "pdbx_aromatic_flag": [],
            "pdbx_stereo_config": [],
        }

    ccd_data = {
        "chem_comp_atom": chem_comp_atom,
        "chem_comp_bond": chem_comp_bond,
    }

    return ccd_data


def struct_to_cif_atom_site(struct):
    """
    Helper function to generate the cif ``_atom_site`` entry for a cif file.

    Parameters
    ----------
    struct : MolecularSystemBase
        The structure or structure to be converted into the ``_atom_site`` entry.

    Returns
    -------
    atom_site_dict : dict
        Dictionary containing the ``_atom_site`` entry for a cif file.
    """
    n_frames = len(struct.trajectory)
    atom_group = np.tile(struct.record_types, n_frames).tolist()
    atom_id = np.tile(struct.atomids.astype(str), n_frames).tolist()
    type_symbol = np.tile(struct.atypes, n_frames).tolist()
    label_atom_id = np.tile(struct.names, n_frames).tolist()
    label_alt_id = np.tile(
        [x if x.strip() else "." for x in struct.altlocs], n_frames
    ).tolist()
    label_comp_id = np.tile(struct.resnames, n_frames).tolist()
    label_asym_id = np.tile(struct.segids, n_frames).tolist()
    label_entity_id = np.tile((struct.segindices + 1).astype(str), n_frames).tolist()
    label_seq_id = np.tile((struct.resindices + 1).astype(str), n_frames).tolist()
    pdbx_PDB_ins_code = np.tile(
        [x if x.strip() else "?" for x in struct.icodes], n_frames
    ).tolist()
    occupancy = np.tile(struct.occupancies.astype(str), n_frames).tolist()
    B_iso_or_equiv = np.tile(struct.bs.astype(str), n_frames).tolist()
    auth_seq_id = np.tile(struct.resnums.astype(str), n_frames).tolist()
    auth_asym_id = np.tile(struct.chains, n_frames).tolist()
    pdbx_PDB_model_num = np.concatenate(
        [[str(i + 1)] * struct.n_atoms for i in range(n_frames)]
    ).tolist()

    cartn_x, cartn_y, cartn_z = [], [], []

    for i, ts in enumerate(struct.trajectory):
        cartn_x.extend(struct.positions[:, 0].astype(str).tolist())
        cartn_y.extend(struct.positions[:, 1].astype(str).tolist())
        cartn_z.extend(struct.positions[:, 2].astype(str).tolist())

    atom_site_dict = {
        "group_PDB": atom_group,
        "id": atom_id,
        "type_symbol": type_symbol,
        "label_atom_id": label_atom_id,
        "label_alt_id": label_alt_id,
        "label_comp_id": label_comp_id,
        "label_asym_id": label_asym_id,
        "label_entity_id": label_entity_id,
        "label_seq_id": label_seq_id,
        "pdbx_PDB_ins_code": pdbx_PDB_ins_code,
        "cartn_x": cartn_x,
        "cartn_y": cartn_y,
        "cartn_z": cartn_z,
        "occupancy": occupancy,
        "B_iso_or_equiv": B_iso_or_equiv,
        "auth_seq_id": auth_seq_id,
        "auth_asym_id": auth_asym_id,
        "pdbx_PDB_model_num": pdbx_PDB_model_num,
    }

    return atom_site_dict