Source code for chilife.numba_utils

from typing import Tuple
import math as m
import numpy as np
from memoization import cached
from numba import njit, prange


[docs] @njit(cache=True) def compute_bin(x: float, bin_edges: np.ndarray) -> int: """Compute determine bin for a given observation, x Parameters ---------- x : float Observation. bin_edges : np.ndarray Available bins. Returns ------- bin : int Index of bin to add observation to. """ # Assuming uniform bins for now n = bin_edges.shape[0] - 1 a_min = bin_edges[0] a_max = bin_edges[-1] bin = int(n * (x - a_min) / (a_max - a_min)) return bin
[docs] @njit(cache=True) def dirichlet(x: np.ndarray) -> np.ndarray: """Dirichlet distribution sampler Parameters ---------- x : np.ndarray Parameters of dirichlet distribution to sample from. Returns ------- dist : np.ndarray Random sample from dirichlet distribution with parameters x. """ dist = np.zeros_like(x) for i, xi in enumerate(x): dist[i] = np.random.gamma(xi) dist /= dist.sum() return dist
[docs] @njit(cache=True) def get_delta_r(r: np.ndarray) -> float: """Calculates increment of a sorted array with evenly distributed floats Parameters ---------- r : np.ndarray Array of equally spaced consecutive numbers. Returns ------- delta_r : float Distance between numbers in r. """ delta_r = (r[-1] - r[0]) / len(r) return delta_r
[docs] @njit(cache=True) def histogram(a: np.ndarray, weights: np.ndarray, r: np.ndarray) -> np.ndarray: """Calculate histogram for observations, ``a`` , with weights over the domain, r, as precursor for KDE of distance distribution. Parameters ---------- a : np.ndarray Array of relevant pairwise distances between NO midpoint libraries. weights : np.ndarray Weights array corresponding to distances in ``a`` . r : np.ndarray Domain of distance distribution. Returns ------- hist : np.ndarray Weighted histogram of pairwise distances. """ # Calculate bin size delta_r = get_delta_r(r) # Preallocate histogram array hist = np.zeros((len(r),), dtype=np.float64) # Determine edges bin_edges = r # Compute histogram for x, weight in zip(a, weights): _bin = compute_bin(x, bin_edges) # If density is over the upper distance limit reallocate it to 0 (impossible distance). if _bin >= len(hist): hist[0] += weight continue hist[_bin] += weight return hist
[docs] @njit(cache=True) def jaccard(p: np.ndarray, q: np.ndarray) -> float: """Calculate the jaccard index between the two provided vectors (distance distributions) Parameters ---------- p, q : np.ndarray Sets to calculate the jaccard index between. Returns ------- jac : float Jaccard index between ``q`` and ``p`` . """ overlap = np.minimum(p, q).sum() union = np.maximum(p, q).sum() jac = overlap / union return jac
[docs] @njit(cache=True) def kl_divergence(p: np.ndarray, q: np.ndarray) -> float: """Compute the Kullback–Leibler divergence (KLD) of ``p`` and ``q`` . Parameters ---------- p, q : np.ndarray The distributions to calculate the KLD between. Returns ------- kld: float The Kullback–Leibler divergence between ``p`` and ``q`` . """ kld = np.sum(np.where(p != 0, p * np.log(p / q), 0)) return kld
[docs] @njit(cache=True) def normdist(delta_r: float, mu: float = 0.0, sigma: float = 1.0) -> Tuple[np.ndarray, np.ndarray]: """Calculate normal distribution for convolution with histogram of distances between two spin label ensembles Parameters ---------- delta_r : float Space between points in distance domain. mu : float Mean for normal distribution. sigma : float Standard deviation of normal distribution. Returns ------- x : np.ndarray Domain of the distribution. y : np.ndarray PDF values of the distribution. """ # Calculate normal distribution x = np.arange(mu - 3.5 * sigma, mu + 3.5 * sigma + delta_r, delta_r) kernel_domain = (x - mu) ** 2 coef = 1 / (np.sqrt(2 * np.pi * sigma**2)) denom = 2 * sigma**2 y = coef * np.exp(-(kernel_domain / denom)) return x, y
[docs] @njit(cache=True) def pairwise_dist(x: np.ndarray, y: np.ndarray) -> np.ndarray: """ Calculate the pairwise (Euclidean) distance between the coordinate sets. Parameters ---------- x, y : np.ndarray Coordinate sets to calculate the distance between Returns ------- distances : np.ndarray Pairwise distance matrix between coordinates of x and y. """ M = x.shape[0] N = y.shape[0] L = x.shape[1] distances = np.empty((M, N), dtype=np.float64) for i in range(M): for j in range(N): d = 0 for l in range(L): tmp = x[i, l] - y[j, l] tmp = tmp * tmp d += tmp distances[i, j] = m.sqrt(d) return distances
@njit(cache=True) def _ic_to_cart(IC_idx_Array: np.ndarray, ICArray: np.ndarray) -> np.ndarray: """Convert internal coordinates into cartesian coordinates. Parameters ---------- IC_idx_Array : np.ndarray Array of indexes corresponding to the atoms defining the internal coordinate. IC_idx_Array[i, 0] # index of the atom that atom ``i`` is bonded to. IC_idx_Array[i, 1] # index of the atom that atom ``i`` creates an angle with. IC_idx_Array[i, 2] # index of the atom that atom ``i`` creates a dihedral angle with. A value of -1 indicates that there is no precursor atom to define the bond, angle or dihedral and that atom ``i`` is one of the coordinate system defining atoms. ICArray : Internal coordinate values IC_idx_Array[i, 0] # bond distance of the atom that atom ``i`` and one of its precursor atoms. IC_idx_Array[i, 1] # bond angle between atom ``i`` two of its precursor atoms. IC_idx_Array[i, 2] # index of the atom that atom ``i`` creates a dihedral angle with three of it's precursor atoms. Returns ------- coords : np.ndarray Array of cartesian coordinates corresponding to ICAtom list atoms """ coords = np.zeros((len(ICArray), 3)) M = np.zeros((3, 3)) N = np.zeros((3, 3)) for i in np.arange(len(ICArray)): if IC_idx_Array[i, 0] < 0: continue elif IC_idx_Array[i, 1] < 0: coords[i, 0] = ICArray[i, 0] elif IC_idx_Array[i, 2] < 0: x = ICArray[i, 0] * np.cos(np.pi - ICArray[i, 1]) y = ICArray[i, 0] * np.sin(np.pi - ICArray[i, 1]) coords[i, 0] = coords[IC_idx_Array[i, 0], 0] + x coords[i, 1] = y else: sinAngle, sinDihedral = np.sin(ICArray[i, 1:]) cosAngle, cosDihedral = np.cos(ICArray[i, 1:]) x = ICArray[i, 0] * cosAngle y = ICArray[i, 0] * cosDihedral * sinAngle z = ICArray[i, 0] * sinDihedral * sinAngle a = coords[IC_idx_Array[i, 2]] b = coords[IC_idx_Array[i, 1]] c = coords[IC_idx_Array[i, 0]] ab = b - a bc = c - b bc /= np.linalg.norm(bc) n = np.cross(ab, bc) n /= np.linalg.norm(n) ncbc = np.cross(n, bc) for j in np.arange(3): M[0, j] = bc[j] M[1, j] = ncbc[j] M[2, j] = n[j] N = np.array([-x, y, z]) coords[i] = M.T @ N + coords[IC_idx_Array[i, 0]] return coords
[docs] @njit(parallel=True, cache=True) def batch_ic2cart(IC_idx_Array: np.ndarray, ICArray: np.ndarray): """ Vectorization of ic2cart along the ICArray argument to calculate cartesian coordinates for multiple conformations. Parameters ---------- IC_idx_Array : np.ndarray Array of indexes corresponding to the atoms defining the internal coordinate. IC_idx_Array[i, 0] # index of the atom that atom ``i`` is bonded to. IC_idx_Array[i, 1] # index of the atom that atom ``i`` creates an angle with. IC_idx_Array[i, 2] # index of the atom that atom ``i`` creates a dihedral angle with. A value of -1 indicates that there is no precursor atom to define the bond, angle or dihedral and that atom ``i`` is one of the coordinate system defining atoms. ICArray : Internal coordinate values for ``n`` conformers IC_idx_Array[n, i, 0] # bond distance of the atom that atom ``i`` and one of its precursor atoms. IC_idx_Array[n, i, 1] # bond angle between atom ``i`` two of its precursor atoms. IC_idx_Array[n, i, 2] # index of the atom that atom ``i`` creates a dihedral angle with three of it's precursor atoms. Returns ------- coords : np.ndarray Array of cartesian coordinates corresponding to ICAtom list atoms """ coords = np.zeros_like(ICArray) for i in prange(len(ICArray)): coords[i] = _ic_to_cart(IC_idx_Array, ICArray[i]) return coords
[docs] @njit(cache=True) def np_all_axis1(x: np.ndarray) -> np.ndarray: """Numba compatible version of np.all(x, axis=1). Parameters ---------- x : (M, N) np.ndarray Boolean input array. Returns ------- out : (M,) np.ndarray Boolean array indicating if all values in the second axis of ``x`` are true. """ out = np.ones(x.shape[0], dtype=np.bool8) for i in range(x.shape[1]): out = np.logical_and(out, x[:, i]) return out
# @njit(cache=True, parallel=True) def _get_sasas( atom_coords: np.ndarray, atom_radii: np.ndarray, env_coords: np.ndarray, all_radii: np.ndarray, grid_points: np.ndarray ) -> np.ndarray: """ Get the solvent accessible surface areas (SASA) over all frames and aotms of ``atom_coords`` in the provided environment. Parameters ---------- atom_coords : (M, N, 3) np.ndarray M frames of the 3D cartesian coordinates of the N atoms to calculate the SASA of. atom_radii : (N,) np.ndarray vdW radii + solvent radii of the atoms in ``atom_coords`` env_coords : (L, 3) np.ndarray 3D cartesian coordinates of the atoms nearby the atoms for which the SASA is desired. all_radii : (L,) np.ndarray vdW radii + solvent radii of the atoms in ``env_coords`` grid_points : (K,) np.ndarray Spherical points to use for SASA calculation. Returns ------- all_sasa: (M, N) np.ndarray Calculated SASA of each atom in each frame. """ all_sasa = np.zeros(atom_coords.shape[:2], dtype=np.float64) for staten in range(len(atom_coords)): atc = atom_coords[staten] alc = np.vstack((atc, env_coords)) all_sasa[staten] = _get_sasa(atc, atom_radii, alc, all_radii, grid_points) return all_sasa @cached @njit(cache=True) def _get_sasa( atom_coords: np.ndarray, atom_radii: np.ndarray, all_coords: np.ndarray, all_radii: np.ndarray, grid_points: np.ndarray ) -> np.ndarray: """ Get the solvent accessible surface area of ``atom_coords`` in the provided environment. Parameters ---------- atom_coords : (N, 3) np.ndarray 3D cartesian coordinates of the N atoms to calculate the SASA of. atom_radii : (N,) np.ndarray vdW radii + solvent radii of the atoms in ``atom_coords`` all_coords : (M, 3), np.ndarray 3D cartesian coordinates of the atoms of ``atom_coords`` and the nearby atoms. all_radii : (M,) np.ndarray vdW radii + solvent radii of the atoms in ``all_coords`` grid_points : (K,) np.ndarray Spherical points to use for SASA calculation. Returns ------- atom_sasa: (N,) np.ndarray Array of SASAs for each atom in atom_coords """ atom_sasa = np.zeros(len(atom_coords), dtype=np.int64) for i in range(len(atom_radii)): posi, radi = atom_coords[i], atom_radii[i] neighbor_idxs = [] for j, (posj, radj) in enumerate(zip(all_coords, all_radii)): if j == i: continue diff = posi - posj dist_squared = diff @ diff if dist_squared < (radi + radj) ** 2: neighbor_idxs.append(j) sphere = grid_points * radi + posi for j in neighbor_idxs: posj, radj = all_coords[j], all_radii[j] left_overs = [] for k, posk in enumerate(sphere): diff = posj - posk dist_squared = diff @ diff if dist_squared > radj ** 2: left_overs.append(posk) sphere = np.zeros((len(left_overs), 3), dtype=np.float64) for k in range(len(left_overs)): sphere[k] = left_overs[k] atom_sasa[i] = sphere.shape[0] atom_sasa = atom_sasa * (4.0 * np.pi / len(grid_points)) * atom_radii ** 2 return atom_sasa
[docs] def get_sasa( atom_coords: np.ndarray, atom_radii: np.ndarray, environment_coords: np.ndarray = None, environment_radii: np.ndarray = None, probe_radius: float = 1.4, npoints: int = 1024, by_atom: bool = False ) -> np.ndarray: """ Parameters ---------- atom_coords : (M, N, 3) np.ndarray M frames of the 3D cartesian coordinates of the N atoms to calculate the SASA of. atom_radii : (N,) np.ndarray vdW radii + solvent radii of the atoms in ``atom_coords`` environment_coords : (L, 3) np.ndarray 3D cartesian coordinates of the atoms nearby the atoms for which the SASA is desired. environment_radii : (L,) np.ndarray vdW radii + solvent radii of the atoms in ``env_coords`` probe_radius : flaot Radius of the solvent probe. npoints : int Number of points to use in sphere used for SASA calculation by_atom : bool Return SASA of each atom rather than the set of atoms Returns ------- sasa : np.ndarray: Solvent accessible surface areas of the provided coords in the provided environment. """ grid_points = fibonacci_points(npoints) if environment_coords is None: environment_coords = np.empty(shape=(0, 3), dtype=np.float64) environment_radii = np.empty(shape=0, dtype=np.float64) atom_radii = atom_radii.copy() environment_radii = environment_radii.copy() atom_radii += probe_radius environment_radii += probe_radius atom_coords = atom_coords[None, ...] if len(atom_coords.shape) == 2 else atom_coords.copy() all_radii = np.hstack((atom_radii, environment_radii)) atom_sasa = _get_sasas(atom_coords, atom_radii, environment_coords, all_radii, grid_points) if by_atom: return atom_sasa else: return np.sum(atom_sasa, axis=1)
[docs] def fibonacci_points(n: int) -> np.ndarray: """ Get ``n`` evenly spaced points on the unit sphere using the fibonacci method. Parameters ---------- n : int Number of points to return. Returns ------- coords: np.ndarray 3D coordinates of the points on the unit sphere. """ phi = (3 - m.sqrt(5)) * np.pi * np.arange(n) z = np.linspace(1 - 1.0/n, 1.0/n - 1, n) radius = np.sqrt(1 - z*z) coords = np.empty((n, 3)) coords[:, 0] = radius * np.cos(phi) coords[:, 1] = radius * np.sin(phi) coords[:, 2] = z return coords