OpenMM Score Function for Amino Acid Repacking

This example demonstrates how openMM can be used with chilife to perform side chain repacking. While this example currently only works with native amino acids it will ideally lead to a set of methods to integrate with non-canonical amino acids. Because this example integrates with OpenMM it does require it as a dependency. OpenMM is best installed via cond-forge.

[1]:
import numpy as np
from scipy.optimize import linear_sum_assignment
import matplotlib.pyplot as plt

import chilife as xl
from tempfile import TemporaryDirectory

from openmm import *
from openmm.app import *
C:\Users\mhtes\micromamba\envs\openMM\Lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
[2]:
def match_atoms(reference, subject):
    """ Helper function to match openMM atoms with chilife RotamerEnsemble atoms since they will likely be in different orders and have different names.
    This works by looping through all rotamers and finding atom-pair matching that minimizes the RMSD between the RotamerEnsemble atoms and the OpenMM atoms.
    This operation is performed on every rotamer in the library and the best scoring pairing is used."""

    # Initialize storage variables
    dmin_score = np.inf
    dmap = None

    # Loop through rotamers
    for i, sub in enumerate(subject.coords):

        # Create pairwise distance  metrix between atoms
        diff = reference.positions[:,None, ...] - sub[None, ...]
        diff = diff * diff
        diff = np.sum(diff, axis=-1)

        # Identify best pairing
        idx1, idx2 =linear_sum_assignment(diff)
        args = [(a, b) for a, b in zip(idx1, idx2)]
        score = np.sum([diff[arg] for arg in args])

        # Compare with previous best and store if necessary
        if score < dmin_score:
            dmap = idx2
            dmin_score = score

    return dmap


class OpenMMEnergyFunc:
    """Score function for repacking. Note that this function uses the amber14 forcefield which does not parameterize any spin labels or most other NCAAs used by chilife. This examples uses only Natrual amino acids to demonstrate
    How chiLife can be integrated with OpenMM. """

    def __init__(self, protein):
        """This object will take in a single protein. This protein should have all atoms present (including hydrogen atoms) with no heterogens or other abnormalities. A structure like this can be prepared with something like PDBFixer.
        For each site the structure should already be mutated to the appropriate amino acid you wish to model. This can be done using the ``xl.mutate`` function. During construction an OpenMM.Simulation object will be created
        which can be re-used for all RotamerEnsembles being attached to this protein"""

        protein = protein.atoms
        self.protein = protein

        # Write protein to temporary file to load in with OpenMM PDBFile for quick and easy topology and parameterization
        with TemporaryDirectory() as tempdir:
            xl.save(f'{tempdir}/tmp.pdb', protein)
            pdb = PDBFile(f'{tempdir}/tmp.pdb')

        # Use Amber14 forcefield with GBN2 implicit solvent model
        ff = ForceField('amber14-all.xml', 'implicit/gbn2.xml')

        # Use only non-bonded forces since bonded forces of the protein will not change and bonded forces of the rotamers should be accounted for in the library.
        ff._forces = [force for force in ff._forces if isinstance(force, forcefield.NonbondedGenerator)]

        # Create and store simulation object for energy calculations
        omm_system = ff.createSystem(pdb.topology, nonbondedMethod=CutoffNonPeriodic, nonbondedCutoff=1.0*unit.nanometer)
        integrator = LangevinMiddleIntegrator(300*unit.kelvin, 1/unit.picosecond, 0.004*unit.picoseconds)
        self.simulation = Simulation(pdb.topology, omm_system, integrator)



    def prepare_system(self, system):

        # Identify and store system indices that correspond to the rotamer ensemble
        psel = self.protein.select_atoms(system.selstr)
        system.efunc_protein_ix = psel.ix
        system.efunc_residue_ix = match_atoms(psel, system)


    def __call__(self, system, **kwargs):
        """Only requirement for a chilife energy  function is to accept a ``system`` and return an energy score in kcal/mol for each rotamer in that system."""
        if not hasattr(system,  'efunc_protein_ix'):
            self.prepare_system(system)

        pose_start = self.protein.positions

        Es = []
        for rot in system.coords:
            pose_start[system.efunc_protein_ix] = rot[system.efunc_residue_ix]
            self.simulation.context.setPositions(pose_start * unit.angstrom)
            energy = self.simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(unit.kilocalorie_per_mole)
            Es.append(energy)

        Es = np.array(Es)
        return Es
[3]:
# Load in protein object
protein = xl.load_protein('system.pdb')

# Create Energy function unique to this protein
omm_efunc = OpenMMEnergyFunc(protein)

# Create a rotamer ensemble of at lysine 48 using ORS and the new energy function
RL = xl.RotamerEnsemble('LYS', 48, protein, sample=5000, use_H=True, energy_func=omm_efunc)

[4]:
# Perform repacking around the new rotamer library using the same energy function
traj, de = xl.repack(protein, RL, energy_func=omm_efunc)
100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [00:03<00:00, 52.54it/s]
[5]:
fig, ax = plt.subplots()
ax.plot(np.cumsum(de))

ax.set_xlabel('Step')
ax.set_ylabel('Change in energy (kcal/mol)')
plt.show()
../_images/gallery_13-OpenMM_Score_Function_5_0.png