Coverage for encodermap/loading/features.py: 67%
467 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-07 11:05 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-07 11:05 +0000
1# -*- coding: utf-8 -*-
2# encodermap/loading/features.py
3################################################################################
4# Encodermap: A python library for dimensionality reduction.
5#
6# Copyright 2019-2022 University of Konstanz and the Authors
7#
8# Authors:
9# Kevin Sawade, Patricia Schwarz
10#
11# Encodermap is free software: you can redistribute it and/or modify
12# it under the terms of the GNU Lesser General Public License as
13# published by the Free Software Foundation, either version 2.1
14# of the License, or (at your option) any later version.
15# This package is distributed in the hope that it will be useful to other
16# researches. IT DOES NOT COME WITH ANY WARRANTY WHATSOEVER; without even the
17# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18# See the GNU Lesser General Public License for more details.
19#
20# See <http://www.gnu.org/licenses/>.
21################################################################################
22"""Classes to be used as custom features with pyemma add_custom_feature
24ToDo:
25 * Write tests
26 * Put the describe_last_feats function into utils.
27 * Add Nan feature.
28 * Write Examples.
30"""
32##############################################################################
33# Imports
34##############################################################################
37from __future__ import annotations
39import copy
40import itertools
41from typing import TYPE_CHECKING, Callable
43import numpy as np
45import encodermap
47from .._optional_imports import _optional_import
49##############################################################################
50# Typing
51##############################################################################
54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 from mdtraj.core.residue_names import _AMINO_ACID_CODES
56 from pyemma.coordinates.data.featurization.angles import (
57 AngleFeature,
58 DihedralFeature,
59 )
60 from pyemma.coordinates.data.featurization.distances import DistanceFeature
61 from pyemma.coordinates.data.featurization.misc import (
62 CustomFeature,
63 SelectionFeature,
64 )
66 from encodermap._typing import AnyFeature
69##############################################################################
70# Optional Imports
71##############################################################################
74CustomFeature: CustomFeature = _optional_import(
75 "pyemma", "coordinates.data.featurization.misc.CustomFeature"
76)
77SelectionFeature: SelectionFeature = _optional_import(
78 "pyemma", "coordinates.data.featurization.misc.SelectionFeature"
79)
80DihedralFeature: DihedralFeature = _optional_import(
81 "pyemma", "coordinates.data.featurization.angles.DihedralFeature"
82)
83AngleFeature: AngleFeature = _optional_import(
84 "pyemma", "coordinates.data.featurization.angles.AngleFeature"
85)
86DistanceFeature: DistanceFeature = _optional_import(
87 "pyemma", "coordinates.data.featurization.distances.DistanceFeature"
88)
89_AMINO_ACID_CODES: dict = _optional_import(
90 "mdtraj", "core.residue_names._AMINO_ACID_CODES"
91)
92indices_phi: Callable = _optional_import("mdtraj", "geometry.dihedral.indices_phi")
93indices_psi: Callable = _optional_import("mdtraj", "geometry.dihedral.indices_psi")
94indices_omega: Callable = _optional_import("mdtraj", "geometry.dihedral.indices_omega")
95indices_chi1: Callable = _optional_import("mdtraj", "geometry.dihedral.indices_chi1")
96indices_chi2: Callable = _optional_import("mdtraj", "geometry.dihedral.indices_chi2")
97indices_chi3: Callable = _optional_import("mdtraj", "geometry.dihedral.indices_chi3")
98indices_chi4: Callable = _optional_import("mdtraj", "geometry.dihedral.indices_chi4")
99indices_chi5: Callable = _optional_import("mdtraj", "geometry.dihedral.indices_chi5")
102##############################################################################
103# Globals
104##############################################################################
106__all__ = [
107 "AllCartesians",
108 "AllBondDistances",
109 "CentralCartesians",
110 "CentralBondDistances",
111 "CentralAngles",
112 "CentralDihedrals",
113 "SideChainCartesians",
114 "SideChainBondDistances",
115 "SideChainAngles",
116 "SideChainDihedrals",
117]
119##############################################################################
120# Functions
121##############################################################################
124def describe_last_feats(feat: AnyFeature, n: int = 5) -> None:
125 """Prints the description of the last `n` features.
127 Args:
128 feat (encodermap.Featurizer): An instance of a featurizer.
129 n (Optional[int]): The number of last features to decribe. Defaults to 5.
131 """
132 for i, lbl in enumerate(feat.describe()[-n:]):
133 print(lbl)
136def add_KAC_backbone_bonds(top):
137 """Adds acetylated Lysine specific backbone bonds to mdtraj.Topology.
139 Args:
140 top (mdtraj.Topology): The topology to be extended.
142 Returns:
143 mdtraj.Topology: The new topology with added bonds.
145 Note:
146 The bonds are currently not at the correct index, i.e. they are
147 at the very end of top.bonds and not at the correct position.
149 """
150 # for index, bond in enumerate(top.bonds):
151 # if any([a.residue.name == 'KAC' for a in bond]):
152 # break
153 resid_KAC = top.atom(top.select("resname KAC")[0]).residue.index
154 # add C - N(KAC)
155 bond = (
156 top.select(f"name C and resid {resid_KAC - 1}")[0],
157 top.select(f"name N and resid {resid_KAC}")[0],
158 )
159 bond = [top.atom(b) for b in bond]
160 top.add_bond(*bond)
161 # add N (KAC) - CA(KAC)
162 bond = (
163 top.select(f"name N and resid {resid_KAC}")[0],
164 top.select(f"name CA and resid {resid_KAC}")[0],
165 )
166 bond = [top.atom(b) for b in bond]
167 top.add_bond(*bond)
168 # add CA (KAC) - C(KAC)
169 bond = (
170 top.select(f"name CA and resid {resid_KAC}")[0],
171 top.select(f"name C and resid {resid_KAC}")[0],
172 )
173 bond = [top.atom(b) for b in bond]
174 top.add_bond(*bond)
175 return top
178def add_KAC_sidechain_bonds(top):
179 """Adds acetylated Lysine specific side chain bonds to mdtraj.Topology. Bonds between
180 indented atoms are added:
181 KAC11-N 102
182 KAC11-H 103
183 KAC11-CA 104
184 KAC11-CB 105
185 KAC11-CG 106
186 KAC11-CD 107
187 KAC11-CE 108
188 KAC11-NZ 109
189 KAC11-HZ 110
190 KAC11-CH 111
191 KAC11-OI2 112
192 KAC11-CI1 113
193 KAC11-C 114
194 KAC11-O 115
196 Args:
197 top (mdtraj.Topology): The topology to be extended.
199 Returns:
200 mdtraj.Topology: The new topology with added bonds.
202 Note:
203 The bonds are currently not at the correct index, i.e. they are
204 at the very end of top.bonds and not at the correct position.
205 """
206 # for r in top.residues:
207 # if r.name == 'KAC':
208 # for a in r.atoms:
209 # print(a, a.index)
210 # print(a.__dir__())
211 resid_KAC = top.atom(top.select("resname KAC")[0]).residue.index
212 # add CA - CB(KAC)
213 bond = (
214 top.select(f"name CA and resid {resid_KAC - 1}")[0],
215 top.select(f"name CB and resid {resid_KAC}")[0],
216 )
217 bond = [top.atom(b) for b in bond]
218 top.add_bond(*bond)
219 # add CB (KAC) - CG(KAC)
220 bond = (
221 top.select(f"name CB and resid {resid_KAC}")[0],
222 top.select(f"name CG and resid {resid_KAC}")[0],
223 )
224 bond = [top.atom(b) for b in bond]
225 top.add_bond(*bond)
226 # add CG (KAC) - CE(KAC)
227 bond = (
228 top.select(f"name CG and resid {resid_KAC}")[0],
229 top.select(f"name CE and resid {resid_KAC}")[0],
230 )
231 bond = [top.atom(b) for b in bond]
232 top.add_bond(*bond)
233 # add CE (KAC) - NZ(KAC)
234 bond = (
235 top.select(f"name CE and resid {resid_KAC}")[0],
236 top.select(f"name NZ and resid {resid_KAC}")[0],
237 )
238 bond = [top.atom(b) for b in bond]
239 top.add_bond(*bond)
240 return top
243##############################################################################
244# Classes
245##############################################################################
248class CentralDihedrals(DihedralFeature):
249 """Feature that collects all dihedrals in the backbone of a topology.
251 Attributes:
252 top (mdtraj.Topology): Topology of this feature.
253 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
255 """
257 __serialize_version = 0
258 __serialize_fields = ("selstr", "_phi_inds", "_psi_inds", "_omega_inds")
260 def __init__(
261 self,
262 topology,
263 selstr=None,
264 deg=False,
265 cossin=False,
266 periodic=True,
267 omega=True,
268 generic_labels=False,
269 ):
270 """Instantiate this feature class.
272 Args:
273 topology (mdtraj.Topology): A topology to build features from.
274 selstr (Optional[str]): A string, that limits the selection of dihedral angles.
275 Only dihedral angles which atoms are represented by the `selstr` argument
276 are considered. This selection string follows MDTraj's atom selection
277 language: https://mdtraj.org/1.9.3/atom_selection.html. Can also
278 be None, in which case all backbone dihedrals (also omega) are
279 considered. Defaults to None.
280 deg (bool): Whether to return the result in degree (`deg=True`) or in
281 radians (`deg=False`). Defaults to radions.
282 cossin (bool): If True, each angle will be returned as a pair of
283 (sin(x), cos(x)). This is useful, if you calculate the mean
284 (e.g TICA/PCA, clustering) in that space. Defaults to False.
285 periodic (bool): Whether to recognize periodic boundary conditions and
286 work under the minimum image convention. Defaults to True.
288 """
289 self.top = topology
290 self.selstr = selstr
292 indices = indices_psi(self.top)
293 if not selstr: 293 ↛ 296line 293 didn't jump to line 296, because the condition on line 293 was never false
294 self._psi_inds = indices
295 else:
296 self._psi_inds = indices[
297 np.in1d(indices[:, 1], self.top.select(selstr), assume_unique=True)
298 ]
300 self.omega = omega
301 if self.omega: 301 ↛ 310line 301 didn't jump to line 310, because the condition on line 301 was never false
302 indices = indices_omega(self.top)
303 if not selstr: 303 ↛ 306line 303 didn't jump to line 306, because the condition on line 303 was never false
304 self._omega_inds = indices
305 else:
306 self._omega_inds = indices[
307 np.in1d(indices[:, 1], self.top.select(selstr), assume_unique=True)
308 ]
310 indices = indices_phi(self.top)
311 if not selstr: 311 ↛ 314line 311 didn't jump to line 314, because the condition on line 311 was never false
312 self._phi_inds = indices
313 else:
314 self._phi_inds = indices[
315 np.in1d(indices[:, 1], self.top.select(selstr), assume_unique=True)
316 ]
318 if self.omega: 318 ↛ 321line 318 didn't jump to line 321, because the condition on line 318 was never false
319 zipped = zip(self._psi_inds, self._omega_inds, self._phi_inds)
320 else:
321 zipped = zip(self._psi_inds, self._phi_inds)
323 # alternate phi, psi , omega pairs (phi_1, psi_1, omega_1..., phi_n, psi_n, omega_n)
324 dih_indexes = np.array(list(psi_omega_phi for psi_omega_phi in zipped)).reshape(
325 -1, 4
326 )
328 # set generic_labels for xarray
329 if generic_labels:
330 self.describe = self.generic_describe
332 super(CentralDihedrals, self).__init__(
333 self.top, dih_indexes, deg=deg, cossin=cossin, periodic=periodic
334 )
336 @property
337 def name(self):
338 """str: The name of the class: "CentralDihedrals"."""
339 return "CentralDihedrals"
341 @property
342 def indexes(self):
343 """np.ndarray: A (n_angles, 4) shaped numpy array giving the atom indices
344 of the dihedral angles to be calculated."""
345 return self.angle_indexes.astype("int32")
347 def generic_describe(self):
348 """Returns a list of generic labels, not containing residue names.
349 These can be used to stack tops of different topology.
351 Returns:
352 list[str]: A list of labels.
354 """
355 if self.cossin: 355 ↛ 356line 355 didn't jump to line 356, because the condition on line 355 was never true
356 sin_cos = ("COS(PSI %s)", "SIN(PSI %s)")
357 labels_psi = [
358 (
359 sin_cos[0] % i,
360 sin_cos[1] % i,
361 )
362 for i in range(len(self._psi_inds))
363 ]
364 if self.omega:
365 sin_cos = ("COS(OMEGA %s)", "SIN(OMEGA %s)")
366 labels_omega = [
367 (
368 sin_cos[0] % i,
369 sin_cos[1] % i,
370 )
371 for i in range(len(self._omega_inds))
372 ]
373 sin_cos = ("COS(PHI %s)", "SIN(PHI %s)")
374 labels_phi = [
375 (
376 sin_cos[0] % i,
377 sin_cos[1] % i,
378 )
379 for i in range(len(self._phi_inds))
380 ]
381 # produce the same ordering as the given indices (phi_1, psi_1, ..., phi_n, psi_n)
382 # or (cos(phi_1), sin(phi_1), cos(psi_1), sin(psi_1), ..., cos(phi_n), sin(phi_n), cos(psi_n), sin(psi_n))
383 if self.omega:
384 zipped = zip(labels_psi, labels_omega, labels_phi)
385 else:
386 zip(labels_psi, labels_phi)
388 res = list(
389 itertools.chain.from_iterable(itertools.chain.from_iterable(zipped))
390 )
391 else:
392 labels_psi = [
393 f"CENTERDIH PSI %s" % i for i in range(len(self._psi_inds))
394 ]
395 if self.omega: 395 ↛ 399line 395 didn't jump to line 399, because the condition on line 395 was never false
396 labels_omega = [
397 "CENTERDIH OMEGA %s" % i for i in range(len(self._omega_inds))
398 ]
399 labels_phi = ["CENTERDIH PHI %s" % i for i in range(len(self._phi_inds))]
400 if self.omega: 400 ↛ 403line 400 didn't jump to line 403, because the condition on line 400 was never false
401 zipped = zip(labels_psi, labels_omega, labels_phi)
402 else:
403 zipped = zip(labels_psi, labels_phi)
404 res = list(itertools.chain.from_iterable(zipped))
405 return res
407 def describe(self):
408 """Returns a list of labels, that can be used to unambiguously define
409 atoms in the protein topology.
411 Returns:
412 list[str]: A list of labels. This list has as many entries as atoms in `self.top`.
414 """
415 top = self.top
416 getlbl = (
417 lambda at: f"RESID {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}"
418 )
420 if self.cossin: 420 ↛ 421line 420 didn't jump to line 421, because the condition on line 420 was never true
421 sin_cos = ("COS(PSI %s)", "SIN(PSI %s)")
422 labels_psi = [
423 (
424 sin_cos[0] % getlbl(top.atom(ires[1])),
425 sin_cos[1] % getlbl(top.atom(ires[1])),
426 )
427 for ires in self._psi_inds
428 ]
429 if self.omega:
430 sin_cos = ("COS(OMEGA %s)", "SIN(OMEGA %s)")
431 labels_omega = [
432 (
433 sin_cos[0] % getlbl(top.atom(ires[1])),
434 sin_cos[1] % getlbl(top.atom(ires[1])),
435 )
436 for ires in self._omega_inds
437 ]
438 sin_cos = ("COS(PHI %s)", "SIN(PHI %s)")
439 labels_phi = [
440 (
441 sin_cos[0] % getlbl(top.atom(ires[1])),
442 sin_cos[1] % getlbl(top.atom(ires[1])),
443 )
444 for ires in self._phi_inds
445 ]
446 # produce the same ordering as the given indices (phi_1, psi_1, ..., phi_n, psi_n)
447 # or (cos(phi_1), sin(phi_1), cos(psi_1), sin(psi_1), ..., cos(phi_n), sin(phi_n), cos(psi_n), sin(psi_n))
448 if self.omega:
449 zipped = zip(labels_psi, labels_omega, labels_phi)
450 else:
451 zip(labels_psi, labels_phi)
453 res = list(
454 itertools.chain.from_iterable(itertools.chain.from_iterable(zipped))
455 )
456 else:
457 labels_psi = [
458 f"CENTERDIH PSI " + getlbl(top.atom(ires[1]))
459 for ires in self._psi_inds
460 ]
461 if self.omega: 461 ↛ 466line 461 didn't jump to line 466, because the condition on line 461 was never false
462 labels_omega = [
463 "CENTERDIH OMEGA " + getlbl(top.atom(ires[1]))
464 for ires in self._omega_inds
465 ]
466 labels_phi = [
467 "CENTERDIH PHI " + getlbl(top.atom(ires[1]))
468 for ires in self._phi_inds
469 ]
470 if self.omega: 470 ↛ 473line 470 didn't jump to line 473, because the condition on line 470 was never false
471 zipped = zip(labels_psi, labels_omega, labels_phi)
472 else:
473 zipped = zip(labels_psi, labels_phi)
474 res = list(itertools.chain.from_iterable(zipped))
475 return res
477 @property
478 def dask_transform(self):
479 return "dihedral"
482class SideChainDihedrals(DihedralFeature):
483 """Feature that collects all dihedrals in the backbone of a topology.
485 Attributes:
486 top (mdtraj.Topology): Topology of this feature.
487 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
488 options (list[str]): A list of possible sidechain angles ['chi1' to 'chi5'].
490 """
492 __serialize_version: int = 0
493 __serialize_fields: tuple[str] = ("_prefix_label_lengths",)
494 options: list[str] = ["chi1", "chi2", "chi3", "chi4", "chi5"]
496 def __init__(
497 self,
498 top,
499 selstr=None,
500 deg=False,
501 cossin=False,
502 periodic=True,
503 generic_labels=False,
504 ):
505 which = self.options
506 # get all dihedral index pairs
507 from mdtraj.geometry import dihedral
509 indices_dict = {k: getattr(dihedral, "indices_%s" % k)(top) for k in which}
510 if selstr: 510 ↛ 511line 510 didn't jump to line 511, because the condition on line 510 was never true
511 selection = top.select(selstr)
512 truncated_indices_dict = {}
513 for k, inds in indices_dict.items():
514 mask = np.in1d(inds[:, 1], selection, assume_unique=True)
515 truncated_indices_dict[k] = inds[mask]
516 indices_dict = truncated_indices_dict
518 valid = {k: indices_dict[k] for k in indices_dict if indices_dict[k].size > 0}
519 if not valid:
520 raise ValueError(
521 "Could not determine any side chain dihedrals for your topology!"
522 )
524 # for key in indices_dict:
525 # print(key, indices_dict[key])
526 # for proteins that don't have some chi angles we filter which
527 which = list(
528 filter(
529 lambda x: True if len(indices_dict[x]) > 0 else False,
530 indices_dict.keys(),
531 )
532 )
534 # change the sorting to be per-residue and not all chi1 and then all chi2 angles
535 self.per_res_dict = {}
536 for r in top.residues:
537 arrs = []
538 bools = []
539 for k in which:
540 if np.any(np.in1d(valid[k], np.array([a.index for a in r.atoms]))):
541 where = np.where(
542 np.in1d(
543 valid[k].flatten(), np.array([a.index for a in r.atoms])
544 )
545 )[0]
546 arr = valid[k].flatten()[where]
547 bools.append(True)
548 arrs.append(arr)
549 else:
550 bools.append(False)
551 if any(bools):
552 self.per_res_dict[str(r)] = np.vstack(arrs)
554 self._prefix_label_lengths = np.array(
555 [len(indices_dict[k]) if k in which else 0 for k in self.options]
556 )
557 indices = np.vstack([v for v in self.per_res_dict.values()])
559 super(SideChainDihedrals, self).__init__(
560 top=top, dih_indexes=indices, deg=deg, cossin=cossin, periodic=periodic
561 )
563 if generic_labels:
564 self.describe = self.generic_describe
566 @property
567 def name(self):
568 """str: The name of the class: "SideChainDihedrals"."""
569 return "SideChainDihedrals"
571 @property
572 def indexes(self):
573 """np.ndarray: A (n_angles, 4) shaped numpy array giving the atom indices
574 of the dihedral angles to be calculated."""
575 return self.angle_indexes
577 def generic_describe(self):
578 top = self.top
579 getlbl = (
580 lambda at: f"RESID {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}"
581 )
582 prefixes = []
583 for lengths, label in zip(self._prefix_label_lengths, self.options):
584 if self.cossin: 584 ↛ 585line 584 didn't jump to line 585, because the condition on line 584 was never true
585 lengths *= 2
586 prefixes.extend([label.upper()] * lengths)
587 prefixes = []
588 for key, value in self.per_res_dict.items():
589 if self.cossin: 589 ↛ 590line 589 didn't jump to line 590, because the condition on line 589 was never true
590 prefixes.extend(
591 [opt.upper() for opt in self.options[: value.shape[0]]] * 2
592 )
593 else:
594 prefixes.extend([opt.upper() for opt in self.options[: value.shape[0]]])
596 if self.cossin: 596 ↛ 597line 596 didn't jump to line 597, because the condition on line 596 was never true
597 cossin = ("COS({dih} {res})", "SIN({dih} {res})")
598 labels = [
599 s.format(
600 dih=prefixes[j + len(cossin) * i],
601 res=getlbl(self.top.atom(ires[1])),
602 )
603 for i, ires in enumerate(self.angle_indexes)
604 for j, s in enumerate(cossin)
605 ]
606 else:
607 labels = [
608 "SIDECHDIH {dih} {res}".format(
609 dih=prefixes[i], res=getlbl(self.top.atom(ires[1]))
610 )
611 for i, ires in enumerate(self.angle_indexes)
612 ]
613 labels = list(map(lambda x: x[:14] + x[27:31], labels))
614 return labels
616 def describe(self):
617 """Returns a list of labels, that can be used to unambiguously define
618 atoms in the protein topology.
620 Returns:
621 list[str]: A list of labels. This list has as many entries as atoms in `self.top`.
623 """
624 top = self.top
625 getlbl = (
626 lambda at: f"RESID {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}"
627 )
628 prefixes = []
629 for lengths, label in zip(self._prefix_label_lengths, self.options):
630 if self.cossin: 630 ↛ 631line 630 didn't jump to line 631, because the condition on line 630 was never true
631 lengths *= 2
632 prefixes.extend([label.upper()] * lengths)
633 prefixes = []
634 for key, value in self.per_res_dict.items():
635 if self.cossin: 635 ↛ 636line 635 didn't jump to line 636, because the condition on line 635 was never true
636 prefixes.extend(
637 [opt.upper() for opt in self.options[: value.shape[0]]] * 2
638 )
639 else:
640 prefixes.extend([opt.upper() for opt in self.options[: value.shape[0]]])
642 if self.cossin: 642 ↛ 643line 642 didn't jump to line 643, because the condition on line 642 was never true
643 cossin = ("COS({dih} {res})", "SIN({dih} {res})")
644 labels = [
645 s.format(
646 dih=prefixes[j + len(cossin) * i],
647 res=getlbl(self.top.atom(ires[1])),
648 )
649 for i, ires in enumerate(self.angle_indexes)
650 for j, s in enumerate(cossin)
651 ]
652 else:
653 labels = [
654 "SIDECHDIH {dih} {res}".format(
655 dih=prefixes[i], res=getlbl(self.top.atom(ires[1]))
656 )
657 for i, ires in enumerate(self.angle_indexes)
658 ]
660 return labels
663class AllCartesians(SelectionFeature):
664 """Feature that collects all cartesian position of all atoms in the trajectory.
666 Attributes:
667 top (mdtraj.Topology): Topology of this feature.
668 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
669 prefix_label (str): A prefix for the labels. In this case it is 'POSITION'.
671 """
673 __serialize_version = 0
674 __serialize_fields = ("indexes",)
675 prefix_label = "POSITION "
677 def __init__(self, top):
678 """Instantiate the AllCartesians class.
680 Args:
681 top (mdtraj.Topology): A mdtraj topology.
683 """
684 self.top = top
685 self.indexes = self.top.select("all")
686 super().__init__(top, self.indexes)
688 @property
689 def name(self):
690 """str: The name of this class: 'AllCartesians'"""
691 return "AllCartesians"
693 def describe(self):
694 """Returns a list of labels, that can be used to unambiguously define
695 atoms in the protein topology.
697 Returns:
698 list[str]: A list of labels. This list has as many entries as atoms in `self.top`.
700 """
701 getlbl = (
702 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}"
703 )
704 labels = []
705 for i in self.indexes:
706 for pos in ["X", "Y", "Z"]:
707 labels.append(
708 f"{self.prefix_label} {pos} {getlbl(self.top.atom(i))}"
709 )
710 return labels
713class CentralCartesians(AllCartesians):
714 """Feature that collects all cartesian position of the backbone atoms.
716 Attributes:
717 top (mdtraj.Topology): Topology of this feature.
718 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
719 prefix_label (str): A prefix for the labels. In this case it is 'CENTERPOS'.
721 """
723 __serialize_version = 0
724 __serialize_fields = ("indexes",)
725 prefix_label = "CENTERPOS"
727 def __init__(self, top, generic_labels=False):
728 self.top = top
729 super().__init__(self.top)
730 self.central_indexes = self.top.select("name CA or name C or name N")
731 assert len(self.central_indexes) < len(self.indexes)
732 self.indexes = self.central_indexes
733 self.dimension = 3 * len(self.indexes)
735 if generic_labels:
736 self.describe = self.generic_describe
738 def generic_describe(self):
739 labels = []
740 for i in range(len(self.central_indexes)):
741 for pos in ["X", "Y", "Z"]:
742 labels.append(f"{self.prefix_label} {pos} {i}")
743 return labels
745 def describe(self):
746 """Returns a list of labels, that can be used to unambiguously define
747 atoms in the protein topology.
749 Returns:
750 list[str]: A list of labels. This list has as manyu entries as atoms in `self.top`.
752 """
753 getlbl = (
754 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}"
755 )
756 labels = []
757 for i in self.central_indexes:
758 for pos in ["X", "Y", "Z"]:
759 labels.append(
760 f"{self.prefix_label} {pos} {getlbl(self.top.atom(i))}"
761 )
762 return labels
764 @property
765 def name(self):
766 """str: The name of the class: "CentralCartesians"."""
767 return "CentralCartesians"
769 # def transform(self, traj):
770 # newshape = (traj.xyz.shape[0], 3 * self.central_indexes.shape[0])
771 # return np.reshape(traj.xyz[:, self.central_indexes, :], newshape)
774class SideChainCartesians(AllCartesians):
775 """Feature that collects all cartesian position of all non-backbone atoms.
777 Attributes:
778 top (mdtraj.Topology): Topology of this feature.
779 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
780 prefix_label (str): A prefix for the labels. In this case it is 'SIDECHPOS'.
782 """
784 __serialize_version = 0
785 __serialize_fields = ("indexes",)
786 prefix_label = "SIDECHPOS"
788 def __init__(self, top):
789 self.top = top
790 super().__init__(self.top)
791 central_indexes = self.top.select("not backbone")
792 assert len(central_indexes) < len(self.indexes)
793 self.indexes = central_indexes
794 self.dimension = 3 * len(self.indexes)
796 @property
797 def name(self):
798 """str: The name of the class: "SideChainCartesians"."""
799 return "SideChainCartesians"
802class AllBondDistances(DistanceFeature):
803 """Feature that collects all bonds in a topology.
805 Attributes:
806 top (mdtraj.Topology): Topology of this feature.
807 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
808 prefix_label (str): A prefix for the labels. In this case it is 'DISTANCE'.
810 """
812 __serialize_version = 0
813 __serialize_fields = ("distance_indexes", "periodic")
814 prefix_label = "DISTANCE "
816 def __init__(self, top, distance_indexes=None, periodic=True, check_aas=True):
817 self.distance_indexes = distance_indexes
818 if any([r.name not in _AMINO_ACID_CODES for r in top.residues]) and check_aas: 818 ↛ 819line 818 didn't jump to line 819, because the condition on line 818 was never true
819 raise Exception("Unkown amino acid in top.")
820 if self.distance_indexes is None:
821 self.top = top
822 self.distance_indexes = np.vstack(
823 [[b[0].index, b[1].index] for b in self.top.bonds]
824 )
825 # print(self.distance_indexes, len(self.distance_indexes))
826 super().__init__(self.top, self.distance_indexes, periodic)
827 else:
828 super().__init__(self.top, self.distance_indexes, periodic)
829 # print(self.distance_indexes, len(self.distance_indexes))
831 def generic_describe(self):
832 labels = []
833 for i in range(len(self.distance_indexes)):
834 labels.append(f"{self.prefix_label}{i}")
835 return labels
837 def describe(self):
838 """Returns a list of labels, that can be used to unambiguously define
839 atoms in the protein topology.
841 Returns:
842 list[str]: A list of labels. This list has as many entries as atoms in `self.top`.
844 """
845 getlbl = (
846 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4}"
847 )
848 labels = []
849 for i, j in self.distance_indexes:
850 i, j = self.top.atom(i), self.top.atom(j)
851 labels.append(
852 f"{self.prefix_label}{getlbl(i)} DIST {getlbl(j)} CHAIN {int(np.unique([a.residue.chain.index for a in [i, j]]))}"
853 )
854 return labels
856 @property
857 def name(self):
858 """str: The name of the class: "AllBondDistances"."""
859 return "AllBondDistances"
861 @property
862 def indexes(self):
863 """np.ndarray: A (n_angles, 2) shaped numpy array giving the atom indices
864 of the distances to be calculated."""
865 return self.distance_indexes
868class CentralBondDistances(AllBondDistances):
869 """Feature that collects all bonds in the backbone of a topology.
871 Attributes:
872 top (mdtraj.Topology): Topology of this feature.
873 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
874 prefix_label (str): A prefix for the labels. In this case it is 'CENTERDISTANCE'.
876 """
878 __serialize_version = 0
879 __serialize_fields = ("distance_indexes", "periodic")
880 prefix_label = "CENTERDISTANCE "
882 def __init__(
883 self,
884 top,
885 distance_indexes=None,
886 periodic=True,
887 check_aas=True,
888 generic_labels=False,
889 ):
890 self.top = copy.deepcopy(top)
891 if any([r.name == "KAC" for r in top.residues]): 891 ↛ 892line 891 didn't jump to line 892, because the condition on line 891 was never true
892 self.top = add_KAC_backbone_bonds(self.top)
893 check_aas = False
894 select = self.top.select("name CA or name C or name N")
896 # temp_list = []
897 # for i in range(len(select)-1):
898 # temp_list.append([select[i], select[i+1]])
899 # temp_array = np.array(temp_list)
900 # print("this is the array of the selected atoms:", temp_array)
901 # print(len(temp_array))
903 if distance_indexes is None: 903 ↛ 906line 903 didn't jump to line 906, because the condition on line 903 was never false
904 distance_indexes = []
906 for b in self.top.bonds:
907 # print(b)
908 if np.all([np.isin(x.index, select) for x in b]):
909 distance_indexes.append([x.index for x in b])
910 distance_indexes = np.sort(distance_indexes, axis=0)
912 if generic_labels:
913 self.describe = self.generic_describe
915 super().__init__(
916 self.top, distance_indexes, periodic, check_aas=check_aas
917 ) # distance_indexes
919 @property
920 def name(self):
921 """str: The name of the class: "CentralBondDistances"."""
922 return "CentralBondDistances"
924 @property
925 def indexes(self):
926 """np.ndarray: A (n_angles, 2) shaped numpy array giving the atom indices
927 of the distances to be calculated."""
928 return self.distance_indexes
931class SideChainBondDistances(AllBondDistances):
932 """Feature that collects all bonds not in the backbone of a topology.
934 Attributes:
935 top (mdtraj.Topology): Topology of this feature.
936 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
937 prefix_label (str): A prefix for the labels. In this case it is 'SIDECHDISTANCE'.
939 """
941 __serialize_version = 0
942 __serialize_fields = ("distance_indexes", "periodic")
943 prefix_label = "SIDECHDISTANCE "
945 def __init__(self, top, periodic=True):
946 self.top = top
947 from mdtraj.geometry import dihedral
949 which = ["chi1", "chi2", "chi3", "chi4", "chi5"]
950 indices_dict = {k: getattr(dihedral, "indices_%s" % k)(top) for k in which}
951 flat_list = [
952 item
953 for sublist in indices_dict.values()
954 for item in sublist.flatten().tolist()
955 ]
956 atoms_in_sidechain_dihedrals = set(flat_list)
958 distance_indexes = []
959 for angle, indices in indices_dict.items():
960 for index in indices:
961 if angle == "chi1":
962 distance_indexes.append([index[1], index[2]])
963 distance_indexes.append([index[2], index[3]])
964 else:
965 distance_indexes.append([index[2], index[3]])
966 distance_indexes = np.sort(distance_indexes, axis=0)
967 super().__init__(self.top, distance_indexes, periodic)
969 @property
970 def name(self):
971 """str: The name of the class: "SideChainBondDistances"."""
972 return "SideChainBondDistances"
974 @property
975 def indexes(self):
976 """np.ndarray: A (n_angles, 2) shaped numpy array giving the atom indices
977 of the distances to be calculated."""
978 return self.distance_indexes
981class CentralAngles(AngleFeature):
982 """Feature that collects all angles in the backbone of a topology.
984 Attributes:
985 top (mdtraj.Topology): Topology of this feature.
986 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
987 prefix_label (str): A prefix for the labels. In this case it is 'CENTERANGLE'.
989 """
991 __serialize_version = 0
992 __serialize_fields = ("angle_indexes", "deg", "cossin", "periodic")
993 prefix_label = "CENTERANGLE "
995 def __init__(
996 self, top, deg=False, cossin=False, periodic=True, generic_labels=False
997 ):
998 self.top = copy.deepcopy(top)
999 select = self.top.select("name CA or name C or name N")
1000 # add 4 bonds in KAC
1001 if any([r.name == "KAC" for r in top.residues]): 1001 ↛ 1002line 1001 didn't jump to line 1002, because the condition on line 1001 was never true
1002 self.top = add_KAC_backbone_bonds(self.top)
1003 bonds = np.vstack([[x.index for x in b] for b in self.top.bonds])
1004 angle_indexes = []
1005 for a in select:
1006 where = np.where(bonds == a)
1007 possible_bonds = bonds[where[0], :]
1008 where = np.isin(possible_bonds, select)
1009 hits = np.count_nonzero(np.all(where, axis=1))
1010 if hits <= 1:
1011 continue
1012 elif hits == 2: 1012 ↛ 1018line 1012 didn't jump to line 1018, because the condition on line 1012 was never false
1013 where = np.all(where, axis=1)
1014 these = np.unique(
1015 [self.top.atom(i).index for i in possible_bonds[where, :].flatten()]
1016 )
1017 angle_indexes.append(these)
1018 elif hits == 3:
1019 raise Exception(
1020 f"Can't deal with these angles. One atom is part of three possible angles"
1021 )
1022 elif hits == 4:
1023 raise Exception(
1024 f"Can't deal with these angles. One atom is part of four possible angles"
1025 )
1026 else:
1027 raise Exception(
1028 f"Can't deal with these angles. One atom is part of three possible angles"
1029 )
1031 angle_indexes = np.vstack(angle_indexes)
1032 angle_indexes = np.unique(angle_indexes, axis=0)
1033 if generic_labels:
1034 self.describe = self.generic_describe
1035 super().__init__(self.top, angle_indexes, deg, cossin, periodic)
1037 def generic_describe(self):
1038 labels = []
1039 for i in range(len(self.angle_indexes)):
1040 labels.append(f"{self.prefix_label}{i}")
1041 return labels
1043 def describe(self):
1044 """Returns a list of labels, that can be used to unambiguously define
1045 atoms in the protein topology.
1047 Returns:
1048 list[str]: A list of labels. This list has as many entries as atoms in `self.top`.
1050 """
1051 getlbl = (
1052 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4}"
1053 )
1054 labels = []
1055 for i, j, k in self.angle_indexes:
1056 i, j, k = self.top.atom(i), self.top.atom(j), self.top.atom(k)
1057 labels.append(
1058 f"{self.prefix_label}{getlbl(i)} ANGLE {getlbl(j)} ANGLE {getlbl(k)} CHAIN {int(np.unique([a.residue.chain.index for a in [i, j, k]]))}"
1059 )
1060 return labels
1062 @property
1063 def name(self):
1064 """str: The name of the class: "CentralAngles"."""
1065 return "CentralAngles"
1067 @property
1068 def indexes(self):
1069 """np.ndarray: A (n_angles, 3) shaped numpy array giving the atom indices
1070 of the angles to be calculated."""
1071 return self.angle_indexes
1074class SideChainAngles(AngleFeature):
1075 """Feature that collects all angles not in the backbone of a topology.
1077 Attributes:
1078 top (mdtraj.Topology): Topology of this feature.
1079 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
1080 prefix_label (str): A prefix for the labels. In this case it is 'SIDECHANGLE'.
1082 """
1084 __serialize_version = 0
1085 __serialize_fields = ("angle_indexes", "deg", "cossin", "periodic")
1086 prefix_label = "SIDECHANGLE "
1088 def __init__(self, top, deg=False, cossin=False, periodic=True):
1089 self.top = copy.deepcopy(top)
1090 select = self.top.select(
1091 "not backbone and (type C or type N or type S or type O) and not type H"
1092 )
1093 # add 4 bonds in KAC
1094 if any([r.name == "KAC" for r in top.residues]):
1095 self.top = add_KAC_sidechain_bonds(self.top)
1096 bonds = np.vstack([[x.index for x in b] for b in self.top.bonds])
1097 angle_indexes = []
1098 for a in select:
1099 where = np.where(bonds == a)
1100 possible_bonds = bonds[where[0], :]
1101 where = np.isin(possible_bonds, select)
1102 possible_bonds = possible_bonds[
1103 np.where(np.all(where, axis=1))[0], :
1104 ] # remove atoms not in selection (like hydrogen)
1105 where = where[
1106 np.where(np.all(where, axis=1))[0], :
1107 ] # remove atoms not in selection (like hydrogen)
1108 hits = np.count_nonzero(np.all(where, axis=1))
1109 if hits <= 1:
1110 continue
1111 elif hits == 2:
1112 where_ax = np.all(where, axis=1)
1113 angle_atoms = np.unique(
1114 [
1115 self.top.atom(i).index
1116 for i in possible_bonds[where_ax, :].flatten()
1117 ]
1118 )
1119 assert len(angle_atoms) == 3, print(
1120 [(i, self.top.atom(i)) for i in angle_atoms]
1121 )
1122 angle_indexes.append(angle_atoms)
1123 elif hits == 3:
1124 where_ax = np.vstack([where[:-1], [False, False]])
1125 where_ax = np.all(where_ax, axis=1)
1126 angle_atoms = np.unique(
1127 [
1128 self.top.atom(i).index
1129 for i in possible_bonds[where_ax, :].flatten()
1130 ]
1131 )
1132 assert len(angle_atoms) == 3, print(
1133 [(i, self.top.atom(i)) for i in angle_atoms]
1134 )
1135 angle_indexes.append(angle_atoms)
1136 where_ax = np.vstack([[False, False], where[1:]])
1137 where_ax = np.all(where_ax, axis=1)
1138 angle_atoms = np.unique(
1139 [
1140 self.top.atom(i).index
1141 for i in possible_bonds[where_ax, :].flatten()
1142 ]
1143 )
1144 assert len(angle_atoms) == 3, print(
1145 [(i, self.top.atom(i)) for i in angle_atoms]
1146 )
1147 angle_indexes.append(angle_atoms)
1148 elif hits == 4:
1149 raise Exception(
1150 f"Can't deal with these angles. One atom is part of four possible angles"
1151 )
1152 else:
1153 raise Exception(
1154 f"Can't deal with these angles. One atom is part of three possible angles"
1155 )
1156 angle_indexes = np.vstack(angle_indexes)
1157 super().__init__(self.top, angle_indexes, deg, cossin, periodic)
1159 def describe(self):
1160 """Returns a list of labels, that can be used to unambiguously define
1161 atoms in the protein topology.
1163 Retruns:
1164 list[str]: A list of labels. This list has as many entries as atoms in `self.top`.
1166 """
1167 getlbl = (
1168 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4}"
1169 )
1170 labels = []
1171 for i, j, k in self.angle_indexes:
1172 i, j, k = self.top.atom(i), self.top.atom(j), self.top.atom(k)
1173 labels.append(
1174 f"{self.prefix_label}{getlbl(i)} ANGLE {getlbl(j)} ANGLE {getlbl(k)} CHAIN {int(np.unique([a.residue.chain.index for a in [i, j, k]]))}"
1175 )
1176 return labels
1178 @property
1179 def name(self):
1180 """str: The name of the class: "SideChainAngles"."""
1181 return "SideChainAngles"
1183 @property
1184 def indexes(self):
1185 """np.ndarray: A (n_angles, 3) shaped numpy array giving the atom indices
1186 of the angles to be calculated."""
1187 return self.angle_indexes