Coverage for encodermap/loading/features.py: 15%
1293 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-12-31 16:54 +0100
« prev ^ index » next coverage.py v7.4.1, created at 2024-12-31 16:54 +0100
1# -*- coding: utf-8 -*-
2# encodermap/loading/features.py
3################################################################################
4# EncoderMap: A python library for dimensionality reduction.
5#
6# Copyright 2019-2024 University of Konstanz and the Authors
7#
8# Authors:
9# Kevin Sawade, Patricia Schwarz
10#
11# Encodermap is free software: you can redistribute it and/or modify
12# it under the terms of the GNU Lesser General Public License as
13# published by the Free Software Foundation, either version 2.1
14# of the License, or (at your option) any later version.
15# This package is distributed in the hope that it will be useful to other
16# researches. IT DOES NOT COME WITH ANY WARRANTY WHATSOEVER; without even the
17# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18# See the GNU Lesser General Public License for more details.
19#
20# See <http://www.gnu.org/licenses/>.
21################################################################################
22"""Features contain topological information of proteins and other biomolecules.
24These topological information can be calculated once and then provided with
25input coordinates to calculate frame-wise collective variables of MD simulations.
27The features in this module used to inherit from PyEMMA's features
28(https://github.com/markovmodel/PyEMMA), but PyEMMA has since been archived.
30If using EncoderMap's featurization make sure to also cite PyEMMA, from which
31a lot of this code was adopted::
33 @article{scherer_pyemma_2015,
34 author = {Scherer, Martin K. and Trendelkamp-Schroer, Benjamin
35 and Paul, Fabian and Pérez-Hernández, Guillermo and Hoffmann, Moritz and
36 Plattner, Nuria and Wehmeyer, Christoph and Prinz, Jan-Hendrik and Noé, Frank},
37 title = {{PyEMMA} 2: {A} {Software} {Package} for {Estimation},
38 {Validation}, and {Analysis} of {Markov} {Models}},
39 journal = {Journal of Chemical Theory and Computation},
40 volume = {11},
41 pages = {5525-5542},
42 year = {2015},
43 issn = {1549-9618},
44 shorttitle = {{PyEMMA} 2},
45 url = {http://dx.doi.org/10.1021/acs.jctc.5b00743},
46 doi = {10.1021/acs.jctc.5b00743},
47 urldate = {2015-10-19},
48 month = oct,
49 }
51"""
53################################################################################
54# Imports
55################################################################################
58# Future Imports at the top
59from __future__ import annotations
61# Standard Library Imports
62import inspect
63import itertools
64import warnings
65from collections import deque
66from collections.abc import Callable, Sequence
67from copy import deepcopy
68from typing import TYPE_CHECKING, Any, Final, Literal, Optional, TypeVar, Union
70# Third Party Imports
71import numpy as np
72from optional_imports import _optional_import
75################################################################################
76# Typing
77################################################################################
80if TYPE_CHECKING:
81 # Third Party Imports
82 import dask
83 import mdtraj as md
85 # Encodermap imports
86 from encodermap.trajinfo.info_all import TrajEnsemble
87 from encodermap.trajinfo.info_single import SingleTraj
88 from encodermap.trajinfo.trajinfo_utils import _AMINO_ACID_CODES
91AllCartesiansType = TypeVar("AllCartesians", bound="Parent")
92AllBondDistancesType = TypeVar("AllBondDistances", bound="Parent")
93CentralCartesiansType = TypeVar("CentralCartesians", bound="Parent")
94CentralBondDistancesType = TypeVar("CentralBondDistances", bound="Parent")
95CentralAnglesType = TypeVar("CentralAngles", bound="Parent")
96CentralDihedralsType = TypeVar("CentralDihedrals", bound="Parent")
97SideChainCartesiansType = TypeVar("SideChainCartesians", bound="Parent")
98SideChainBondDistancesType = TypeVar("SideChainBondDistances", bound="Parent")
99SideChainAnglesType = TypeVar("SideChainAngles", bound="Parent")
100SideChainDihedralsType = TypeVar("SideChainDihedrals", bound="Parent")
101CustomFeatureType = TypeVar("CustomFeature", bound="Parent")
102SelectionFeatureType = TypeVar("SelectionFeature", bound="Parent")
103AngleFeatureType = TypeVar("AngleFeature", bound="Parent")
104DihedralFeatureType = TypeVar("DihedralFeature", bound="Parent")
105DistanceFeatureType = TypeVar("DistanceFeature", bound="Parent")
106AlignFeatureType = TypeVar("AlignFeature", bound="Parent")
107InverseDistanceFeatureType = TypeVar("InverseDistanceFeature", bound="Parent")
108ContactFeatureType = TypeVar("ContactFeature", bound="Parent")
109BackboneTorsionFeatureType = TypeVar("BackboneTorsionFeature", bound="Parent")
110ResidueMinDistanceFeatureType = TypeVar("ResidueMinDistanceFeature", bound="Parent")
111GroupCOMFeatureType = TypeVar("GroupCOMFeature", bound="Parent")
112ResidueCOMFeatureType = TypeVar("ResidueCOMFeature", bound="Parent")
113SideChainTorsionsType = TypeVar("SideChainTorsions", bound="Parent")
114MinRmsdFeatureType = TypeVar("MinRmsdFeature", bound="Parent")
117AnyFeature = Union[
118 AllCartesiansType,
119 AllBondDistancesType,
120 CentralCartesiansType,
121 CentralBondDistancesType,
122 CentralAnglesType,
123 CentralDihedralsType,
124 SideChainCartesiansType,
125 SideChainBondDistancesType,
126 SideChainAnglesType,
127 SideChainDihedralsType,
128 CustomFeatureType,
129 SelectionFeatureType,
130 AngleFeatureType,
131 DihedralFeatureType,
132 DistanceFeatureType,
133 AlignFeatureType,
134 InverseDistanceFeatureType,
135 ContactFeatureType,
136 BackboneTorsionFeatureType,
137 ResidueMinDistanceFeatureType,
138 GroupCOMFeatureType,
139 ResidueCOMFeatureType,
140 SideChainTorsionsType,
141 MinRmsdFeatureType,
142]
145################################################################################
146# Optional Imports
147################################################################################
150md = _optional_import("mdtraj")
151_dist_mic = _optional_import("mdtraj", "geometry._geometry._dist_mic")
152_dist = _optional_import("mdtraj", "geometry._geometry._dist")
153_dihedral_mic = _optional_import("mdtraj", "geometry._geometry._dihedral_mic")
154_dihedral = _optional_import("mdtraj", "geometry._geometry._dihedral")
155_angle_mic = _optional_import("mdtraj", "geometry._geometry._angle_mic")
156_angle = _optional_import("mdtraj", "geometry._geometry._angle")
157box_vectors_to_lengths_and_angles = _optional_import(
158 "mdtraj", "utils.unitcell.box_vectors_to_lengths_and_angles"
159)
160dask = _optional_import("dask")
163################################################################################
164# Globals
165################################################################################
167__all__: list[str] = [
168 "AllCartesians",
169 "AllBondDistances",
170 "CentralCartesians",
171 "CentralBondDistances",
172 "CentralAngles",
173 "CentralDihedrals",
174 "SideChainCartesians",
175 "SideChainBondDistances",
176 "SideChainAngles",
177 "SideChainDihedrals",
178 "CustomFeature",
179 "SelectionFeature",
180 "AngleFeature",
181 "DihedralFeature",
182 "DistanceFeature",
183 "AlignFeature",
184 "InverseDistanceFeature",
185 "ContactFeature",
186 "BackboneTorsionFeature",
187 "ResidueMinDistanceFeature",
188 "GroupCOMFeature",
189 "ResidueCOMFeature",
190 "SideChainTorsions",
191 "MinRmsdFeature",
192]
195PERIODIC_WARNING: bool = False
196PYEMMA_CITATION_WARNING: bool = False
197PYEMMA_FEATURES: list[str] = [
198 "SelectionFeature",
199 "AngleFeature",
200 "DihedralFeature",
201 "DistanceFeature",
202 "AlignFeature",
203 "InverseDistanceFeature",
204 "ContactFeature",
205 "BackboneTorsionFeature",
206 "ResidueMinDistanceFeature",
207 "GroupCOMFeature",
208 "ResidueCOMFeature",
209 "SideChainTorsions",
210 "MinRmsdFeature",
211]
214################################################################################
215# Functions
216################################################################################
219def pair(*numbers: int) -> int:
220 """ConvertGroup's (https://convertgroup.com/) implementation of
221 Matthew Szudzik's pairing function (http://szudzik.com/ElegantPairing.pdf)
223 Maps a pair of non-negative integers to a uniquely associated single non-negative integer.
224 Pairing also generalizes for `n` non-negative integers, by recursively mapping the first pair.
225 For example, to map the following tuple:
227 Args:
228 *numbers (int): Variable length integers.
230 Returns:
231 int: The paired integer.
233 """
234 if len(numbers) < 2:
235 raise ValueError("Szudzik pairing function needs at least 2 numbers as input")
236 elif any((n < 0) or (not isinstance(n, int)) for n in numbers):
237 raise ValueError(
238 f"Szudzik pairing function maps only non-negative integers. In your "
239 f"input, there seems to be negative or non-integer values: {numbers=}"
240 )
242 numbers = deque(numbers)
244 # fetch the first two numbers
245 n1 = numbers.popleft()
246 n2 = numbers.popleft()
248 if n1 != max(n1, n2):
249 mapping = pow(n2, 2) + n1
250 else:
251 mapping = pow(n1, 2) + n1 + n2
253 mapping = int(mapping)
255 if not numbers:
256 # recursion concludes
257 return mapping
258 else:
259 numbers.appendleft(mapping)
260 return pair(*numbers)
263def unpair(number: int, n: int = 2) -> list[int]:
264 """ConvertGroup's (https://convertgroup.com/) implementation of
265 Matthew Szudzik's pairing function (http://szudzik.com/ElegantPairing.pdf)
267 The inverse function outputs the pair associated with a non-negative integer.
268 Unpairing also generalizes by recursively unpairing a non-negative integer to
269 `n` non-negative integers.
271 For example, to associate a `number` with three non-negative
272 integers n_1, n_2, n_3, such that:
274 pairing(n_1, n_2, n_3) = `number`
276 the `number` will first be unpaired to n_p, n_3, then the n_p will be unpaired to n_1, n_2,
277 producing the desired n_1, n_2 and n_3.
279 Args:
280 number(int): The paired integer.
281 n (int): How many integers are paired in `number`?
283 Returns:
284 list[int]: A list of length `n` with the constituting ints.
286 """
287 if (number < 0) or (not isinstance(number, int)):
288 raise ValueError("Szudzik unpairing function requires a non-negative integer")
290 if number - pow(np.floor(np.floor(number)), 2) < np.floor(np.floor(number)):
292 n1 = number - pow(np.floor(np.floor(number)), 2)
293 n2 = np.floor(np.floor(number))
295 else:
296 n1 = np.floor(np.floor(number))
297 n2 = number - pow(np.floor(np.floor(number)), 2) - np.floor(np.floor(number))
299 n1, n2 = int(n1), int(n2)
301 if n > 2:
302 return [unpair(n1, n - 1) + (n2,)]
303 else:
304 # recursion concludes
305 return [n1, n2]
308def _check_aas(traj: SingleTraj) -> None:
309 r = set([r.name for r in traj.top.residues])
310 diff = r - set(traj._custom_top.amino_acid_codes.keys())
311 if diff:
312 raise Exception(
313 f"I don't recognize these residues: {diff}. "
314 f"Either add them to the `SingleTraj` or `TrajEnsemble` via "
315 f"`traj.load_custom_topology(custom_aas)` or "
316 f"`trajs.load_custom_topology(custom_aas)` "
317 f"Or remove them from your trajectory. See the documentation of the "
318 f"`em.CustomTopology` class. Here are the recognized residues:\n\n"
319 f"{traj._custom_top.amino_acid_codes.keys()}"
320 )
323def describe_last_feats(feat: AnyFeature, n: int = 5) -> None:
324 """Prints the description of the last `n` features.
326 Args:
327 feat (encodermap.Featurizer): An instance of a featurizer.
328 n (int): The number of last features to describe. Default is 5.
330 """
331 for i, lbl in enumerate(feat.describe()[-n:]):
332 print(lbl)
335def _describe_atom(
336 topology: md.Topology,
337 index: int,
338) -> str:
339 """
340 Returns a string describing the given atom.
342 Args:
343 topology (md.Topology): An MDTraj Topology.
344 index (str): The index of the atom.
346 Return:
347 str: A description of the atom.
349 """
350 atom = topology.atom(index)
351 if topology.n_chains > 1:
352 return f"{atom.residue.name} {atom.residue.resSeq} {atom.name} {atom.index} {atom.residue.chain.index}"
353 else:
354 return f"{atom.residue.name} {atom.residue.resSeq} {atom.name} {atom.index}"
357################################################################################
358# Parent Classes
359################################################################################
362class CitePYEMMAWarning(UserWarning):
363 pass
366class FeatureMeta(type):
367 """Inspects the __init__ of classes and adds attributes to them based on
368 their call signature.
370 If a feature uses the arguments `deg` or `omega` in
371 its call signature, the instance will have the CLASS attributes `_use_angle` and
372 `_use_omega` set to True. Otherwise, the instance will have them set as False.
374 This allows other functions that use these features to easily discern whether
375 they need these arguments before instantiating the classes.
377 Example:
378 >>> from encodermap.loading import features
379 >>> f_class = getattr(features, "SideChainDihedrals")
380 >>> f_class._use_angle
381 True
382 >>> f_class._use_omega
383 False
385 """
387 def __new__(cls, name, bases, dct):
388 x = super().__new__(cls, name, bases, dct)
389 args = inspect.getfullargspec(x.__init__)
390 if args.varargs is not None: 390 ↛ 391line 390 didn't jump to line 391, because the condition on line 390 was never true
391 raise Exception(f"{x.__init__=} {x=} {args=} {args=}")
392 args = args.args
393 if "deg" in args:
394 x._use_angle = True
395 else:
396 x._use_angle = False
397 if "omega" in args:
398 x._use_omega = True
399 else:
400 x._use_omega = False
401 if "periodic" in args:
402 x._use_periodic = True
403 else:
404 x._use_periodic = False
405 x.atom_feature = False
406 x._raise_on_unitcell = False
407 return x
410class Feature(metaclass=FeatureMeta):
411 """Parent class to all feature classes. Implements the FeatureMeta,
412 the transform method, and checks for unknown amino acids..
414 This class implements functionality, that holds true for all features.
415 The `transform()` method can be used by subclasses in two ways:
416 * Provide all args with None. In this case, the traj in `self.traj`
417 will be used to calculate the transformation.
418 * Provide custom `xyz`, `unitcell_vectors`, and `unitcell_info`. In this
419 case,
421 """
423 def __init__(
424 self,
425 traj: Union[SingleTraj, TrajEnsemble],
426 check_aas: bool = True,
427 periodic: Optional[bool] = None,
428 delayed: bool = False,
429 ) -> None:
430 self.traj = traj
431 self._raise_on_unitcell = False
432 if isinstance(self.traj.top, list):
433 assert len(self.traj.top) == 1, (
434 f"The trajs in the features seem to have multiple toplogies: "
435 f"{self.traj.top_files}. Features can only work with single "
436 f"topologies."
437 )
438 self.top = self.traj.top[0]
439 else:
440 self.top = self.traj.top
441 if check_aas:
442 _check_aas(traj)
444 self.delayed = delayed
446 if periodic is not None:
447 if periodic and self.traj._have_unitcell:
448 self.periodic = True
449 elif periodic and not self.traj._have_unitcell:
450 self.periodic = False
451 self._raise_on_unitcell
452 global PERIODIC_WARNING
453 if not PERIODIC_WARNING:
454 warnings.warn(
455 f"You requested a `em.loading.features.Feature` to calculate "
456 f"features in a periodic box, using the minimum image convention, "
457 f"but the trajectory you provided does not have "
458 f"unitcell information. If this feature will later be supplied "
459 f"with trajectories with unitcell information, an Exception "
460 f"will be raised, to make sure distances/angles are calculated "
461 f"correctly.",
462 stacklevel=2,
463 )
464 PERIODIC_WARNING = True
465 else:
466 self.periodic = False
468 global PYEMMA_CITATION_WARNING
469 if not PYEMMA_CITATION_WARNING and self.__class__.__name__ in PYEMMA_FEATURES:
470 warnings.warn(
471 message=(
472 "EncoderMap's featurization uses code from the now deprecated "
473 "python package PyEMMA (https://github.com/markovmodel/PyEMMA). "
474 "Please make sure to also cite them, when using EncoderMap."
475 ),
476 category=CitePYEMMAWarning,
477 )
478 PYEMMA_CITATION_WARNING = True
480 @property
481 def dimension(self) -> int:
482 """int: The dimension of the feature."""
483 return self._dim
485 @dimension.setter
486 def dimension(self, val: Union[float, int]) -> None:
487 self._dim = int(val)
489 def __eq__(self, other: AnyFeature) -> bool:
490 if not issubclass(other.__class__, Feature):
491 return False
492 if not isinstance(other, self.__class__):
493 return False
494 if self.dimension != other.dimension:
495 return False
496 if self.traj is not None:
497 if self.traj.top != other.traj.top:
498 return False
499 if hasattr(self, "ref"):
500 if not np.allclose(self.ref.xyz, other.ref.xyz, rtol=1e-4):
501 return False
502 if hasattr(self, "scheme"):
503 if self.scheme != other.scheme:
504 return False
505 if hasattr(self, "ignore_nonprotein"):
506 if self.ignore_nonprotein != other.ignore_nonprotein:
507 return False
508 if hasattr(self, "periodic"):
509 if self.periodic != other.periodic:
510 return False
511 if hasattr(self, "threshold"):
512 if self.threshold != other.threshold:
513 return False
514 if hasattr(self, "group_definitions"):
515 for self_group_def, other_group_def in zip(
516 self.group_definitions, other.group_definitions
517 ):
518 if not np.array_equal(self_group_def, other_group_def):
519 return False
520 if hasattr(self, "group_pairs"):
521 if not np.array_equal(self.group_pairs, other.group_pairs):
522 return False
523 if hasattr(self, "count_contacts"):
524 if self.count_contacts != other.count_contacts:
525 return False
526 # Encodermap imports
527 from encodermap.misc.xarray import _get_indexes_from_feat
529 try:
530 self_index = _get_indexes_from_feat(self, self.traj)
531 other_index = _get_indexes_from_feat(other, other.traj)
532 if not np.array_equal(self_index, other_index):
533 return False
534 except AttributeError:
535 pass
536 return True
538 def transform(
539 self,
540 xyz: Optional[np.ndarray] = None,
541 unitcell_vectors: Optional[np.ndarray] = None,
542 unitcell_info: Optional[np.ndarray] = None,
543 ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
544 """Carries out the computation of the CVs.
546 For featurization of single trajs, all arguments can be left None,
547 and the values of the `traj` at class instantiation will be
548 returned by this method. For ensembles with a single topology, but
549 multiple trajectories, the xyz, unitcell_vectors, and unitcell_info
550 should be provided accordingly. This parent class' `transform` then
551 carries out checks (do all arguments provide the same number of frames,
552 does the xyz array have the same number of atoms as the `traj` at
553 instantiation, do the unitcell_angles coincide with the one of the
554 parent traj, ...). Thus, it is generally advised to call this method
555 with super() to run these checks.
557 Args:
558 xyz (Optional[np.ndarray]): If None, the coordinates of the
559 trajectory in provided as `traj`, when the feature was instantiated
560 will be used.
561 unitcell_vectors (Optional[np.ndarray]): If None, the unitcell vectors
562 of the trajectory in provided as `traj`, when the feature was instantiated
563 will be used. Unitcell_vectors are arrays with shape (n_frames, 3, 3),
564 where the rows are the bravais vectors a, b, c.
565 unitcell_info (Optional[np.ndarray]): If None, the unitcell info of
566 the trajectory in provided as `traj`, when the feature was
567 instantiated will be used. The unitcell_info is an array with
568 shape (n_frames, 6), where the first three columns are the unitcell
569 lengths in nm, the remaining columns are the unitcell angles in deg.
571 Returns:
572 tuple: A tuple containing three np.ndarrays:
573 - The xyz coordinates.
574 - The unitcell_vectors
575 - The unitcell_info
577 """
578 if self._raise_on_unitcell and (
579 unitcell_info is not None or unitcell_vectors is not None
580 ):
581 raise Exception(
582 f"This feature was instantiated with the keyword argument `periodic=True`, "
583 f"but the `SingleTraj` used for instantiation did not contain any unitcell "
584 f"information. Now, unitcell_infos are fed into the `transform` "
585 f"method of this feature. This behavior is not allowed. Make sure to "
586 f"either specifically set `periodic=False` or fix the unitcells in "
587 f"your trajectory files."
588 )
589 if xyz is not None:
590 input_atoms = xyz.shape[1]
591 try:
592 self_atoms = self.traj.xyz.shape[1]
593 except AttributeError as e:
594 raise Exception(f"{self=}") from e
595 if hasattr(self, "periodic"):
596 if self.periodic:
597 assert unitcell_vectors is not None and unitcell_info is not None, (
598 f"When providing a `feature.transform` function with xyz "
599 f"data, and setting {self.periodic=} to True, please "
600 f"also provide `unitcell_vectors` and `unitcell_info` "
601 f"to calculate distances/angles/dihedrals in periodic space."
602 )
603 assert input_atoms == self_atoms, (
604 f"The shape of the input xyz coordinates is off from the expected "
605 f"shape. The topology {self.top} defines {self_atoms} atoms. The "
606 f"provided array has {xyz.shaope[1]=} atoms."
607 )
608 else:
609 xyz = self.traj.xyz.copy()
610 if unitcell_vectors is not None:
611 assert len(unitcell_vectors) == len(xyz), (
612 f"The shape of the provided `unitcell_vectors` is off from the "
613 f"expected shape. The xyz data contains {len(xyz)=} frames, while "
614 f"the `unitcell_vectors` contains {len(unitcell_vectors)=} frames."
615 )
616 else:
617 if self.traj._have_unitcell:
618 unitcell_vectors = self.traj.unitcell_vectors.copy()
619 else:
620 unitcell_vectors = None
621 if unitcell_info is not None:
622 assert len(unitcell_info) == len(xyz), (
623 f"The shape of the provided `unitcell_info` is off from the "
624 f"expected shape. The xyz data contains {len(xyz)=} frames, while "
625 f"the `unitcell_info` contains {len(unitcell_info)=} frames."
626 )
627 provided_orthogonal = np.allclose(unitcell_info[:, 3:], 90)
628 self_orthogonal = np.allclose(self.traj.unitcell_angles, 90)
629 assert provided_orthogonal == self_orthogonal, (
630 f"The trajectory you provided to `transform` and the one "
631 f"this feature was instantiated with have different crystal "
632 f"systems in their unitcells: {provided_orthogonal=} {self_orthogonal=}"
633 )
634 else:
635 if self.traj._have_unitcell:
636 unitcell_info = np.hstack(
637 [
638 self.traj.unitcell_lengths.copy(),
639 self.traj.unitcell_angles.copy(),
640 ]
641 )
642 else:
643 unitcell_info = None
644 return xyz, unitcell_vectors, unitcell_info
647class CustomFeature(Feature):
648 delayed: bool = False
649 _nonstandard_transform_args: list[str] = [
650 "top",
651 "indexes",
652 "delayed_call",
653 "_fun",
654 "_args",
655 "_kwargs",
656 ]
657 _is_custom: Final[True] = True
658 traj: Optional[SingleTraj] = None
659 top: Optional[md.Topology] = None
660 indexes: Optional[np.ndarray] = None
661 _fun: Optional[Callable] = None
662 _args: Optional[tuple[Any, ...]] = None
663 _kwargs: Optional[dict[str, Any]] = None
665 def __init__(
666 self,
667 fun: Callable,
668 dim: int,
669 traj: Optional[SingleTraj] = None,
670 description: Optional[str] = None,
671 fun_args: tuple[Any, ...] = tuple(),
672 fun_kwargs: dict[str, Any] = None,
673 delayed: bool = False,
674 ) -> None:
675 self.id = None
676 self.traj = traj
677 self.indexes = None
678 if fun_kwargs is None:
679 fun_kwargs = {}
680 self._fun = fun
681 self._args = fun_args
682 self._kwargs = fun_kwargs
683 self._dim = dim
684 self.desc = description
685 self.delayed = delayed
686 assert self._dim > 0, f"Feature dimensions need to be greater than 0."
688 def describe(self) -> list[str]:
689 """Gives a list of strings describing this feature's feature-axis.
691 A feature computes a collective variable (CV). A CV is aligned with an MD
692 trajectory on the time/frame-axis. The feature axis is unique for every
693 feature. A feature describing the backbone torsions (phi, omega, psi) would
694 have a feature axis with the size 3*n-3, where n is the number of residues.
695 The end-to-end distance of a linear protein in contrast would just have
696 a feature axis with length 1. This `describe()` method will label these
697 values unambiguously. A backbone torsion feature's `describe()` could be
698 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
699 The end-to-end distance feature could be described by
700 ['distance_between_MET1_and_LYS80'].
702 Returns:
703 list[str]: The labels of this feature.
705 """
706 if isinstance(self.desc, str):
707 desc = [self.desc]
708 elif self.desc is None:
709 arg_str = (
710 f"{self._args}, {self._kwargs}" if self._kwargs else f"{self._args}"
711 )
712 desc = [f"CustomFeature_{self.id} calling {self._fun} with args {arg_str}"]
713 elif self.desc and not (len(self.desc) == self._dim or len(self.desc) == 1):
714 raise ValueError(
715 f"to avoid confusion, ensure the lengths of 'description' "
716 f"list matches dimension - or give a single element which will be repeated."
717 f"Input was {self.desc}"
718 )
720 if len(desc) == 1 and self.dimension > 0:
721 desc *= self.dimension
723 return desc
725 @property
726 def dask_indices(self):
727 """str: The name of the delayed transformation to carry out with this feature."""
728 return "indexes"
730 @staticmethod
731 @dask.delayed
732 def dask_transform(
733 top: md.Topology,
734 indexes: np.ndarray,
735 delayed_call: Optional[Callable] = None,
736 _fun: Optional[Callable] = None,
737 _args: Optional[Sequence[Any]] = None,
738 _kwargs: Optional[dict[str, Any]] = None,
739 xyz: Optional[np.ndarray] = None,
740 unitcell_vectors: Optional[np.ndarray] = None,
741 unitcell_info: Optional[np.ndarray] = None,
742 ) -> np.ndarray:
743 """The CustomFeature dask transfrom is still under development."""
744 if unitcell_info is not None:
745 traj = md.Trajectory(
746 xyz=xyz,
747 topology=top,
748 unitcell_lengths=unitcell_info[:, :3],
749 unitcell_angles=unitcell_info[:, 3:],
750 )
751 else:
752 traj = md.Trajectory(
753 xyz=xyz,
754 topology=top,
755 )
757 if _kwargs is None:
758 _kwargs = {}
760 if delayed_call is not None:
761 return delayed_call(traj, indexes, **_kwargs)
762 else:
763 if _args is None:
764 _args = tuple()
765 return _fun(traj, *_args, **_kwargs)
767 def transform(
768 self,
769 traj: Optional[md.Trajectory] = None,
770 xyz: Optional[np.ndarray] = None,
771 unitcell_vectors: Optional[np.ndarray] = None,
772 unitcell_info: Optional[np.ndarray] = None,
773 ) -> np.ndarray:
774 """Takes xyz and unitcell information to apply the topological calculations on.
776 When this method is not provided with any input, it will take the
777 traj_container provided as `traj` in the `__init__()` method and transforms
778 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer
779 of a trajectory with identical topology as `self.traj`. If `periodic` was
780 set to True, `unitcell_vectors` and `unitcell_info` should also be provided.
782 Args:
783 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
784 in nanometer. If None is provided, the coordinates of `self.traj`
785 will be used. Otherwise, the topology of this set of xyz
786 coordinates should match the topology of `self.atom`.
787 Defaults to None.
788 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
789 True, the `unitcell_vectors` are needed to calculate the
790 minimum image convention in a periodic space. This numpy
791 array should have the shape (n_frames, 3, 3). The rows of this
792 array correlate to the Bravais vectors a, b, and c.
793 unitcell_info (Optional[np.ndarray]): Basically identical to
794 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
795 the first three columns are the unitcell_lengths in nanometer.
796 The other three columns are the unitcell_angles in degrees.
798 Returns:
799 np.ndarray: The result of the computation with shape (n_frames, n_indexes).
801 """
802 if xyz is not None:
803 self.traj = traj
804 xyz, unitcell_vectors, unitcell_info = super().transform(
805 xyz, unitcell_vectors, unitcell_info
806 )
807 if unitcell_info is not None:
808 traj = md.Trajectory(
809 xyz=xyz,
810 topology=self.traj.top,
811 unitcell_lengths=unitcell_info[:, :3],
812 unitcell_angles=unitcell_info[:, 3:],
813 )
814 else:
815 traj = md.Trajectory(
816 xyz=xyz,
817 topology=self.traj.top,
818 )
819 if hasattr(self, "call"):
820 if xyz is None:
821 traj = md.Trajectory(
822 xyz=self.traj.xyz.copy(),
823 topology=self.traj.top,
824 unitcell_lengths=deepcopy(traj.traj.unitcell_lengths),
825 unitcell_angles=deepcopy(traj.traj.unitcell_angles),
826 )
827 return self.call(traj)
828 feature = self._fun(traj, *self._args, **self._kwargs)
829 if not isinstance(feature, np.ndarray):
830 raise ValueError("Your function should return a NumPy array!")
831 return feature
834class SelectionFeature(Feature):
835 prefix_label: str = "ATOM:"
837 def __init__(
838 self,
839 traj: Union[SingleTraj, TrajEnsemble],
840 indexes: Sequence[int],
841 check_aas: bool = True,
842 delayed: bool = False,
843 ) -> None:
844 super().__init__(traj, check_aas, delayed=delayed)
845 self.indexes = np.asarray(indexes).astype("int32")
846 if len(self.indexes) == 0:
847 raise ValueError(f"Empty indices in {self.__class__.__name__}.")
848 self.dimension = 3 * len(self.indexes)
850 def describe(self) -> list[str]:
851 """Gives a list of strings describing this feature's feature-axis.
853 A feature computes a collective variable (CV). A CV is aligned with an MD
854 trajectory on the time/frame-axis. The feature axis is unique for every
855 feature. A feature describing the backbone torsions (phi, omega, psi) would
856 have a feature axis with the size 3*n-3, where n is the number of residues.
857 The end-to-end distance of a linear protein in contrast would just have
858 a feature axis with length 1. This `describe()` method will label these
859 values unambiguously. A backbone torsion feature's `describe()` could be
860 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
861 The end-to-end distance feature could be described by
862 ['distance_between_MET1_and_LYS80'].
864 Returns:
865 list[str]: The labels of this feature.
867 """
868 labels = []
869 for i in self.indexes:
870 labels.append(f"{self.prefix_label}{_describe_atom(self.top, i)} x")
871 labels.append(f"{self.prefix_label}{_describe_atom(self.top, i)} y")
872 labels.append(f"{self.prefix_label}{_describe_atom(self.top, i)} z")
873 return labels
875 @property
876 def dask_indices(self) -> str:
877 """str: The name of the delayed transformation to carry out with this feature."""
878 return "indexes"
880 @staticmethod
881 @dask.delayed
882 def dask_transform(
883 indexes: np.ndarray,
884 xyz: Optional[np.ndarray] = None,
885 unitcell_vectors: Optional[np.ndarray] = None,
886 unitcell_info: Optional[np.ndarray] = None,
887 ) -> dask.delayed:
888 """The same as `transform()` but without the need to pickle `traj`.
890 When dask delayed concurrencies are distributed, required python objects
891 are pickled. Thus, every feature needs to have its own pickled traj.
892 That defeats the purpose of dask distributed. Thus, this method implements
893 the same calculations as `transform` as a more barebones approach.
894 It foregoes the checks for periodicity and unit-cell shape and just
895 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a
896 staticmethod, so it doesn't require `self` to function. However, it
897 needs the indexes in `self.indexes`. That's why the `dask_indices`
898 property informs the scheduler to also pickle and pass this object to
899 the workers.
901 Args:
902 indexes (np.ndarray): A numpy array with shape (n, ) giving the
903 0-based index of the atoms which positions should be returned.
904 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
905 in nanometer. If None is provided, the coordinates of `self.traj`
906 will be used. Otherwise, the topology of this set of xyz
907 coordinates should match the topology of `self.atom`.
908 Defaults to None.
909 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
910 True, the `unitcell_vectors` are needed to calculate the
911 minimum image convention in a periodic space. This numpy
912 array should have the shape (n_frames, 3, 3). The rows of this
913 array correlate to the Bravais vectors a, b, and c.
914 unitcell_info (Optional[np.ndarray]): Basically identical to
915 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
916 the first three columns are the unitcell_lengths in nanometer.
917 The other three columns are the unitcell_angles in degrees.
919 """
920 newshape = (xyz.shape[0], 3 * indexes.shape[0])
921 result = np.reshape(xyz[:, indexes, :], newshape)
922 return result
924 def transform(
925 self,
926 xyz: Optional[np.ndarray] = None,
927 unitcell_vectors: Optional[np.ndarray] = None,
928 unitcell_info: Optional[np.ndarray] = None,
929 ) -> np.ndarray:
930 """Takes xyz and unitcell information to apply the topological calculations on.
932 When this method is not provided with any input, it will take the
933 traj_container provided as `traj` in the `__init__()` method and transforms
934 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer
935 of a trajectory with identical topology as `self.traj`. If `periodic` was
936 set to True, `unitcell_vectors` and `unitcell_info` should also be provided.
938 Args:
939 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
940 in nanometer. If None is provided, the coordinates of `self.traj`
941 will be used. Otherwise, the topology of this set of xyz
942 coordinates should match the topology of `self.atom`.
943 Defaults to None.
944 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
945 True, the `unitcell_vectors` are needed to calculate the
946 minimum image convention in a periodic space. This numpy
947 array should have the shape (n_frames, 3, 3). The rows of this
948 array correlate to the Bravais vectors a, b, and c.
949 unitcell_info (Optional[np.ndarray]): Basically identical to
950 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
951 the first three columns are the unitcell_lengths in nanometer.
952 The other three columns are the unitcell_angles in degrees.
954 Returns:
955 np.ndarray: The result of the computation with shape (n_frames, n_indexes).
957 """
958 xyz, unitcell_vectors, unitcell_info = super().transform(
959 xyz, unitcell_vectors, unitcell_info
960 )
961 newshape = (xyz.shape[0], 3 * self.indexes.shape[0])
962 result = np.reshape(xyz[:, self.indexes, :], newshape)
963 return result
966class AngleFeature(Feature):
967 def __init__(
968 self,
969 traj: Union[SingleTraj, TrajEnsemble],
970 angle_indexes: np.ndarray,
971 deg: bool = False,
972 cossin: bool = False,
973 periodic: bool = True,
974 check_aas: bool = True,
975 delayed: bool = False,
976 ) -> None:
977 """Instantiate the `AngleFeature` class.
979 Args:
980 traj (Union[SingleTraj, TrajEnsemble]): The trajectory container
981 which topological information will be used to build the angles.
982 angle_indexes (np.ndarray): A numpy array with shape (n_dihedrals, 4),
983 that indexes the 3-tuples of atoms that will be used for
984 the angle calculation.
985 deg (bool): Whether to return the dihedrals in degree (True) or
986 in radian (False). Defaults to False.
987 cossin (bool): Whether to return the angles (False) or tuples of their
988 cos and sin values (True). Defaults to False.
989 periodic (bool): Whether to observe the minimum image convention
990 and respect proteins breaking over the periodic boundary
991 condition as a whole (True). In this case, the trajectory container
992 in `traj` needs to have unitcell information. Defaults to True.
993 check_aas (bool): Whether to check if all aas in `traj.top` are
994 recognized. Defaults to True.
996 """
997 self.angle_indexes = np.array(angle_indexes).astype("int32")
998 if len(self.angle_indexes) == 0:
999 raise ValueError("empty indices")
1000 self.deg = deg
1001 self.cossin = cossin
1002 self.dimension = len(self.angle_indexes)
1003 if cossin:
1004 self.dimension *= 2
1005 super().__init__(traj, check_aas, periodic=periodic, delayed=delayed)
1007 def describe(self) -> list[str]:
1008 """Gives a list of strings describing this feature's feature-axis.
1010 A feature computes a collective variable (CV). A CV is aligned with an MD
1011 trajectory on the time/frame-axis. The feature axis is unique for every
1012 feature. A feature describing the backbone torsions (phi, omega, psi) would
1013 have a feature axis with the size 3*n-3, where n is the number of residues.
1014 The end-to-end distance of a linear protein in contrast would just have
1015 a feature axis with length 1. This `describe()` method will label these
1016 values unambiguously. A backbone torsion feature's `describe()` could be
1017 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
1018 The end-to-end distance feature could be described by
1019 ['distance_between_MET1_and_LYS80'].
1021 Returns:
1022 list[str]: The labels of this feature.
1024 """
1025 if self.cossin:
1026 sin_cos = ("ANGLE: COS(%s - %s - %s)", "ANGLE: SIN(%s - %s - %s)")
1027 labels = [
1028 s
1029 % (
1030 _describe_atom(self.top, triple[0]),
1031 _describe_atom(self.top, triple[1]),
1032 _describe_atom(self.top, triple[2]),
1033 )
1034 for triple in self.angle_indexes
1035 for s in sin_cos
1036 ]
1037 else:
1038 labels = [
1039 "ANGLE: %s - %s - %s "
1040 % (
1041 _describe_atom(self.top, triple[0]),
1042 _describe_atom(self.top, triple[1]),
1043 _describe_atom(self.top, triple[2]),
1044 )
1045 for triple in self.angle_indexes
1046 ]
1047 return labels
1049 @property
1050 def dask_indices(self) -> str:
1051 """str: The name of the delayed transformation to carry out with this feature."""
1052 return "angle_indexes"
1054 @staticmethod
1055 @dask.delayed
1056 def dask_transform(
1057 indexes: np.ndarray,
1058 periodic: bool,
1059 deg: bool,
1060 cossin: bool,
1061 xyz: Optional[np.ndarray] = None,
1062 unitcell_vectors: Optional[np.ndarray] = None,
1063 unitcell_info: Optional[np.ndarray] = None,
1064 ) -> dask.delayed:
1065 """The same as `transform()` but without the need to pickle `traj`.
1067 When dask delayed concurrencies are distributed, required python objects
1068 are pickled. Thus, every feature needs to have its own pickled traj.
1069 That defeats the purpose of dask distributed. Thus, this method implements
1070 the same calculations as `transform` as a more barebones approach.
1071 It foregoes the checks for periodicity and unit-cell shape and just
1072 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a
1073 staticmethod, so it doesn't require `self` to function. However, it
1074 needs the indexes in `self.indexes`. That's why the `dask_indices`
1075 property informs the scheduler to also pickle and pass this object to
1076 the workers.
1078 Args:
1079 indexes (np.ndarray): A numpy array with shape (n, ) giving the
1080 0-based index of the atoms which positions should be returned.
1081 periodic (bool): Whether to observe the minimum image convention
1082 and respect proteins breaking over the periodic boundary
1083 condition as a whole (True). In this case, the trajectory container
1084 in `traj` needs to have unitcell information. Defaults to True.
1085 deg (bool): Whether to return the result in degree (`deg=True`) or in
1086 radians (`deg=False`). Defaults to False (radians).
1087 cossin (bool): If True, each angle will be returned as a pair of
1088 (sin(x), cos(x)). This is useful, if you calculate the means
1089 (e.g. TICA/PCA, clustering) in that space. Defaults to False.
1090 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
1091 in nanometer. If None is provided, the coordinates of `self.traj`
1092 will be used. Otherwise, the topology of this set of xyz
1093 coordinates should match the topology of `self.atom`.
1094 Defaults to None.
1095 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
1096 True, the `unitcell_vectors` are needed to calculate the
1097 minimum image convention in a periodic space. This numpy
1098 array should have the shape (n_frames, 3, 3). The rows of this
1099 array correlate to the Bravais vectors a, b, and c.
1100 unitcell_info (Optional[np.ndarray]): Basically identical to
1101 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
1102 the first three columns are the unitcell_lengths in nanometer.
1103 The other three columns are the unitcell_angles in degrees.
1105 """
1106 if periodic:
1107 assert unitcell_vectors is not None
1108 if unitcell_info is None:
1109 # convert to angles
1110 unitcell_angles = []
1111 for fr_unitcell_vectors in unitcell_vectors:
1112 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles(
1113 fr_unitcell_vectors[0],
1114 fr_unitcell_vectors[1],
1115 fr_unitcell_vectors[2],
1116 )
1117 unitcell_angles.append(np.array([alpha, beta, gamma]))
1118 else:
1119 unitcell_angles = unitcell_info[:, 3:]
1120 # check for an orthogonal box
1121 orthogonal = np.allclose(unitcell_angles, 90)
1123 out = np.empty((xyz.shape[0], indexes.shape[0]), dtype="float32", order="C")
1124 _angle_mic(
1125 xyz,
1126 indexes,
1127 unitcell_vectors.transpose(0, 2, 1).copy(),
1128 out,
1129 orthogonal,
1130 )
1131 else:
1132 out = np.empty((xyz.shape[0], indexes.shape[0]), dtype="float32", order="C")
1133 _angle(xyz, indexes, out)
1134 if cossin:
1135 out = np.dstack((np.cos(out), np.sin(out)))
1136 out = out.reshape(out.shape[0], out.shape[1] * out.shape[2])
1137 if deg and not cossin:
1138 out = np.rad2deg(out)
1139 return out
1141 def transform(
1142 self,
1143 xyz: Optional[np.ndarray] = None,
1144 unitcell_vectors: Optional[np.ndarray] = None,
1145 unitcell_info: Optional[np.ndarray] = None,
1146 ) -> np.ndarray:
1147 """Takes xyz and unitcell information to apply the topological calculations on.
1149 When this method is not provided with any input, it will take the
1150 traj_container provided as `traj` in the `__init__()` method and transforms
1151 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer
1152 of a trajectory with identical topology as `self.traj`. If `periodic` was
1153 set to True, `unitcell_vectors` and `unitcell_info` should also be provided.
1155 Args:
1156 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
1157 in nanometer. If None is provided, the coordinates of `self.traj`
1158 will be used. Otherwise, the topology of this set of xyz
1159 coordinates should match the topology of `self.atom`.
1160 Defaults to None.
1161 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
1162 True, the `unitcell_vectors` are needed to calculate the
1163 minimum image convention in a periodic space. This numpy
1164 array should have the shape (n_frames, 3, 3). The rows of this
1165 array correlate to the Bravais vectors a, b, and c.
1166 unitcell_info (Optional[np.ndarray]): Basically identical to
1167 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
1168 the first three columns are the unitcell_lengths in nanometer.
1169 The other three columns are the unitcell_angles in degrees.
1171 Returns:
1172 np.ndarray: The result of the computation with shape (n_frames, n_indexes).
1174 """
1175 if xyz is not None:
1176 periodic = self.periodic
1177 else:
1178 periodic = self.periodic and self.traj._have_unitcell
1179 xyz, unitcell_vectors, unitcell_info = super().transform(
1180 xyz, unitcell_vectors, unitcell_info
1181 )
1182 if periodic:
1183 assert unitcell_vectors is not None
1184 if unitcell_info is None:
1185 # convert to angles
1186 unitcell_angles = []
1187 for fr_unitcell_vectors in unitcell_vectors:
1188 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles(
1189 fr_unitcell_vectors[0],
1190 fr_unitcell_vectors[1],
1191 fr_unitcell_vectors[2],
1192 )
1193 unitcell_angles.append(np.array([alpha, beta, gamma]))
1194 else:
1195 unitcell_angles = unitcell_info[:, 3:]
1196 # check for an orthogonal box
1197 orthogonal = np.allclose(unitcell_angles, 90)
1199 out = np.empty(
1200 (xyz.shape[0], self.angle_indexes.shape[0]), dtype="float32", order="C"
1201 )
1202 _angle_mic(
1203 xyz,
1204 self.angle_indexes,
1205 unitcell_vectors.transpose(0, 2, 1).copy(),
1206 out,
1207 orthogonal,
1208 )
1209 else:
1210 out = np.empty(
1211 (xyz.shape[0], self.angle_indexes.shape[0]), dtype="float32", order="C"
1212 )
1213 _angle(xyz, self.angle_indexes, out)
1214 if self.cossin:
1215 out = np.dstack((np.cos(out), np.sin(out)))
1216 out = out.reshape(out.shape[0], out.shape[1] * out.shape[2])
1217 if self.deg and not self.cossin:
1218 out = np.rad2deg(out)
1219 return out
1222class DihedralFeature(AngleFeature):
1223 """Dihedrals are torsion angles defined by four atoms."""
1225 def __init__(
1226 self,
1227 traj: Union[SingleTraj, TrajEnsemble],
1228 dih_indexes: np.ndarray,
1229 deg: bool = False,
1230 cossin: bool = False,
1231 periodic: bool = True,
1232 check_aas: bool = True,
1233 delayed: bool = False,
1234 ) -> None:
1235 """Instantiate the `DihedralFeature` class.
1237 Args:
1238 traj (Union[SingleTraj, TrajEnsemble]): The trajectory container
1239 which topological information will be used to build the dihedrals.
1240 dih_indexes (np.ndarray): A numpy array with shape (n_dihedrals, 4),
1241 that indexes the 4-tuples of atoms that will be used for
1242 the dihedral calculation.
1243 deg (bool): Whether to return the dihedrals in degree (True) or
1244 in radian (False). Defaults to False.
1245 cossin (bool): Whether to return the angles (False) or tuples of their
1246 cos and sin values (True). Defaults to False.
1247 periodic (bool): Whether to observe the minimum image convention
1248 and respect proteins breaking over the periodic boundary
1249 condition as a whole (True). In this case, the trajectory container
1250 in `traj` needs to have unitcell information. Defaults to True.
1251 check_aas (bool): Whether to check if all aas in `traj.top` are
1252 recognized. Defaults to True.
1254 """
1255 super().__init__(
1256 traj=traj,
1257 angle_indexes=dih_indexes,
1258 deg=deg,
1259 cossin=cossin,
1260 periodic=periodic,
1261 check_aas=check_aas,
1262 delayed=delayed,
1263 )
1265 def describe(self) -> list[str]:
1266 """Gives a list of strings describing this feature's feature-axis.
1268 A feature computes a collective variable (CV). A CV is aligned with an MD
1269 trajectory on the time/frame-axis. The feature axis is unique for every
1270 feature. A feature describing the backbone torsions (phi, omega, psi) would
1271 have a feature axis with the size 3*n-3, where n is the number of residues.
1272 The end-to-end distance of a linear protein in contrast would just have
1273 a feature axis with length 1. This `describe()` method will label these
1274 values unambiguously. A backbone torsion feature's `describe()` could be
1275 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
1276 The end-to-end distance feature could be described by
1277 ['distance_between_MET1_and_LYS80'].
1279 Returns:
1280 list[str]: The labels of this feature. The length
1281 is determined by the `dih_indexes` and the `cossin` argument
1282 in the `__init__()` method. If `cossin` is false, then
1283 `len(describe()) == self.angle_indexes[-1]`, else `len(describe())`
1284 is twice as long.
1286 """
1287 if self.cossin:
1288 sin_cos = ("DIH: COS(%s - %s - %s - %s)", "DIH: SIN(%s - %s - %s - %s)")
1289 labels = [
1290 s
1291 % (
1292 _describe_atom(self.top, quad[0]),
1293 _describe_atom(self.top, quad[1]),
1294 _describe_atom(self.top, quad[2]),
1295 _describe_atom(self.top, quad[3]),
1296 )
1297 for quad in self.angle_indexes
1298 for s in sin_cos
1299 ]
1300 else:
1301 labels = [
1302 "DIH: %s - %s - %s - %s "
1303 % (
1304 _describe_atom(self.top, quad[0]),
1305 _describe_atom(self.top, quad[1]),
1306 _describe_atom(self.top, quad[2]),
1307 _describe_atom(self.top, quad[3]),
1308 )
1309 for quad in self.angle_indexes
1310 ]
1311 return labels
1313 @staticmethod
1314 @dask.delayed
1315 def dask_transform(
1316 indexes: np.ndarray,
1317 periodic: bool,
1318 deg: bool,
1319 cossin: bool,
1320 xyz: Optional[np.ndarray] = None,
1321 unitcell_vectors: Optional[np.ndarray] = None,
1322 unitcell_info: Optional[np.ndarray] = None,
1323 ) -> dask.delayed:
1324 """The same as `transform()` but without the need to pickle `traj`.
1326 When dask delayed concurrencies are distributed, required python objects
1327 are pickled. Thus, every feature needs to have its own pickled traj.
1328 That defeats the purpose of dask distributed. Thus, this method implements
1329 the same calculations as `transform` as a more barebones approach.
1330 It foregoes the checks for periodicity and unit-cell shape and just
1331 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a
1332 staticmethod, so it doesn't require `self` to function. However, it
1333 needs the indexes in `self.indexes`. That's why the `dask_indices`
1334 property informs the scheduler to also pickle and pass this object to
1335 the workers.
1337 Args:
1338 indexes (np.ndarray): A numpy array with shape (n, ) giving the
1339 0-based index of the atoms which positions should be returned.
1340 periodic (bool): Whether to observe the minimum image convention
1341 and respect proteins breaking over the periodic boundary
1342 condition as a whole (True). In this case, the trajectory container
1343 in `traj` needs to have unitcell information. Defaults to True.
1344 deg (bool): Whether to return the result in degree (`deg=True`) or in
1345 radians (`deg=False`). Defaults to False (radians).
1346 cossin (bool): If True, each angle will be returned as a pair of
1347 (sin(x), cos(x)). This is useful, if you calculate the means
1348 (e.g. TICA/PCA, clustering) in that space. Defaults to False.
1349 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
1350 in nanometer. If None is provided, the coordinates of `self.traj`
1351 will be used. Otherwise, the topology of this set of xyz
1352 coordinates should match the topology of `self.atom`.
1353 Defaults to None.
1354 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
1355 True, the `unitcell_vectors` are needed to calculate the
1356 minimum image convention in a periodic space. This numpy
1357 array should have the shape (n_frames, 3, 3). The rows of this
1358 array correlate to the Bravais vectors a, b, and c.
1359 unitcell_info (Optional[np.ndarray]): Basically identical to
1360 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
1361 the first three columns are the unitcell_lengths in nanometer.
1362 The other three columns are the unitcell_angles in degrees.
1364 """
1365 if periodic:
1366 assert unitcell_vectors is not None
1367 # convert to angles
1368 if unitcell_info is None:
1369 unitcell_angles = []
1370 for fr_unitcell_vectors in unitcell_vectors:
1371 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles(
1372 fr_unitcell_vectors[0],
1373 fr_unitcell_vectors[1],
1374 fr_unitcell_vectors[2],
1375 )
1376 unitcell_angles.append(np.array([alpha, beta, gamma]))
1377 else:
1378 unitcell_angles = unitcell_info[:, 3:]
1380 # check for an orthogonal box
1381 orthogonal = np.allclose(unitcell_angles, 90)
1383 out = np.empty((xyz.shape[0], indexes.shape[0]), dtype="float32", order="C")
1384 _dihedral_mic(
1385 xyz,
1386 indexes,
1387 unitcell_vectors.transpose(0, 2, 1).copy(),
1388 out,
1389 orthogonal,
1390 )
1392 else:
1393 out = np.empty((xyz.shape[0], indexes.shape[0]), dtype="float32", order="C")
1394 _dihedral(xyz, indexes, out)
1396 if cossin:
1397 out = np.dstack((np.cos(out), np.sin(out)))
1398 out = out.reshape(out.shape[0], out.shape[1] * out.shape[2])
1400 if deg:
1401 out = np.rad2deg(out)
1402 return out
1404 def transform(
1405 self,
1406 xyz: Optional[np.ndarray] = None,
1407 unitcell_vectors: Optional[np.ndarray] = None,
1408 unitcell_info: Optional[np.ndarray] = None,
1409 ) -> np.ndarray:
1410 """Takes xyz and unitcell information to apply the topological calculations on.
1412 When this method is not provided with any input, it will take the
1413 traj_container provided as `traj` in the `__init__()` method and transforms
1414 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer
1415 of a trajectory with identical topology as `self.traj`. If `periodic` was
1416 set to True, `unitcell_vectors` and `unitcell_info` should also be provided.
1418 Args:
1419 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
1420 in nanometer. If None is provided, the coordinates of `self.traj`
1421 will be used. Otherwise, the topology of this set of xyz
1422 coordinates should match the topology of `self.atom`.
1423 Defaults to None.
1424 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
1425 True, the `unitcell_vectors` are needed to calculate the
1426 minimum image convention in a periodic space. This numpy
1427 array should have the shape (n_frames, 3, 3). The rows of this
1428 array correlate to the Bravais vectors a, b, and c.
1429 unitcell_info (Optional[np.ndarray]): Basically identical to
1430 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
1431 the first three columns are the unitcell_lengths in nanometer.
1432 The other three columns are the unitcell_angles in degrees.
1434 Returns:
1435 np.ndarray: The result of the computation with shape (n_frames, n_indexes).
1437 """
1438 if xyz is not None:
1439 periodic = self.periodic
1440 else:
1441 periodic = self.periodic and self.traj._have_unitcell
1442 xyz, unitcell_vectors, unitcell_info = Feature.transform(
1443 self, xyz, unitcell_vectors, unitcell_info
1444 )
1445 if periodic:
1446 assert unitcell_vectors is not None
1448 # convert to angles
1449 if unitcell_info is None:
1450 unitcell_angles = []
1451 for fr_unitcell_vectors in unitcell_vectors:
1452 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles(
1453 fr_unitcell_vectors[0],
1454 fr_unitcell_vectors[1],
1455 fr_unitcell_vectors[2],
1456 )
1457 unitcell_angles.append(np.array([alpha, beta, gamma]))
1458 else:
1459 unitcell_angles = unitcell_info[:, 3:]
1461 # check for an orthogonal box
1462 orthogonal = np.allclose(unitcell_angles, 90)
1464 out = np.empty(
1465 (xyz.shape[0], self.angle_indexes.shape[0]), dtype="float32", order="C"
1466 )
1467 _dihedral_mic(
1468 xyz,
1469 self.angle_indexes,
1470 unitcell_vectors.transpose(0, 2, 1).copy(),
1471 out,
1472 orthogonal,
1473 )
1475 else:
1476 out = np.empty(
1477 (xyz.shape[0], self.angle_indexes.shape[0]), dtype="float32", order="C"
1478 )
1479 _dihedral(xyz, self.angle_indexes, out)
1481 if self.cossin:
1482 out = np.dstack((np.cos(out), np.sin(out)))
1483 out = out.reshape(out.shape[0], out.shape[1] * out.shape[2])
1485 if self.deg:
1486 out = np.rad2deg(out)
1487 return out
1490class DistanceFeature(Feature):
1491 prefix_label: str = "DIST:"
1493 def __init__(
1494 self,
1495 traj: Union[SingleTraj, TrajEnsemble],
1496 distance_indexes: np.ndarray,
1497 periodic: bool = True,
1498 dim: Optional[int] = None,
1499 check_aas: bool = True,
1500 delayed: bool = False,
1501 ) -> None:
1502 super().__init__(traj, check_aas, periodic=periodic, delayed=delayed)
1503 self.distance_indexes = np.array(distance_indexes)
1504 if len(self.distance_indexes) == 0:
1505 raise ValueError("empty indices")
1506 if dim is None:
1507 self._dim = len(distance_indexes)
1508 else:
1509 self._dim = dim
1511 def describe(self) -> list[str]:
1512 """Gives a list of strings describing this feature's feature-axis.
1514 A feature computes a collective variable (CV). A CV is aligned with an MD
1515 trajectory on the time/frame-axis. The feature axis is unique for every
1516 feature. A feature describing the backbone torsions (phi, omega, psi) would
1517 have a feature axis with the size 3*n-3, where n is the number of residues.
1518 The end-to-end distance of a linear protein in contrast would just have
1519 a feature axis with length 1. This `describe()` method will label these
1520 values unambiguously. A backbone torsion feature's `describe()` could be
1521 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
1522 The end-to-end distance feature could be described by
1523 ['distance_between_MET1_and_LYS80'].
1525 Returns:
1526 list[str]: The labels of this feature.
1528 """
1529 labels = [
1530 (
1531 f"{self.prefix_label} {_describe_atom(self.top, pair[0])} "
1532 f"{_describe_atom(self.top, pair[1])}"
1533 )
1534 for pair in self.distance_indexes
1535 ]
1536 return labels
1538 @property
1539 def dask_indices(self) -> str:
1540 """str: The name of the delayed transformation to carry out with this feature."""
1541 return "distance_indexes"
1543 @staticmethod
1544 @dask.delayed
1545 def dask_transform(
1546 indexes: np.ndarray,
1547 periodic: bool,
1548 xyz: Optional[np.ndarray] = None,
1549 unitcell_vectors: Optional[np.ndarray] = None,
1550 unitcell_info: Optional[np.ndarray] = None,
1551 ) -> dask.delayed:
1552 """The same as `transform()` but without the need to pickle `traj`.
1554 When dask delayed concurrencies are distributed, required python objects
1555 are pickled. Thus, every feature needs to have its own pickled traj.
1556 That defeats the purpose of dask distributed. Thus, this method implements
1557 the same calculations as `transform` as a more barebones approach.
1558 It foregoes the checks for periodicity and unit-cell shape and just
1559 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a
1560 staticmethod, so it doesn't require `self` to function. However, it
1561 needs the indexes in `self.indexes`. That's why the `dask_indices`
1562 property informs the scheduler to also pickle and pass this object to
1563 the workers.
1565 Args:
1566 indexes (np.ndarray): A numpy array with shape (n, ) giving the
1567 0-based index of the atoms which positions should be returned.
1568 periodic (bool): Whether to observe the minimum image convention
1569 and respect proteins breaking over the periodic boundary
1570 condition as a whole (True). In this case, the trajectory container
1571 in `traj` needs to have unitcell information. Defaults to True.
1572 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
1573 in nanometer. If None is provided, the coordinates of `self.traj`
1574 will be used. Otherwise, the topology of this set of xyz
1575 coordinates should match the topology of `self.atom`.
1576 Defaults to None.
1577 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
1578 True, the `unitcell_vectors` are needed to calculate the
1579 minimum image convention in a periodic space. This numpy
1580 array should have the shape (n_frames, 3, 3). The rows of this
1581 array correlate to the Bravais vectors a, b, and c.
1582 unitcell_info (Optional[np.ndarray]): Basically identical to
1583 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
1584 the first three columns are the unitcell_lengths in nanometer.
1585 The other three columns are the unitcell_angles in degrees.
1587 """
1588 if periodic:
1589 assert unitcell_vectors is not None
1590 # check for an orthogonal box
1591 if unitcell_info is None:
1592 # convert to angles
1593 unitcell_angles = []
1594 for fr_unitcell_vectors in unitcell_vectors:
1595 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles(
1596 fr_unitcell_vectors[0],
1597 fr_unitcell_vectors[1],
1598 fr_unitcell_vectors[2],
1599 )
1600 unitcell_angles.append(np.array([alpha, beta, gamma]))
1601 else:
1602 unitcell_angles = unitcell_info[:, 3:]
1604 # check for an orthogonal box
1605 orthogonal = np.allclose(unitcell_angles, 90)
1607 out = np.empty(
1608 (
1609 xyz.shape[0],
1610 indexes.shape[0],
1611 ),
1612 dtype="float32",
1613 order="C",
1614 )
1615 _dist_mic(
1616 xyz,
1617 indexes.astype("int32"),
1618 unitcell_vectors.transpose(0, 2, 1).copy(),
1619 out,
1620 orthogonal,
1621 )
1622 else:
1623 out = np.empty(
1624 (
1625 xyz.shape[0],
1626 indexes.shape[0],
1627 ),
1628 dtype="float32",
1629 order="C",
1630 )
1631 _dist(xyz, indexes.astype("int32"), out)
1632 return out
1634 def transform(
1635 self,
1636 xyz: Optional[np.ndarray] = None,
1637 unitcell_vectors: Optional[np.ndarray] = None,
1638 unitcell_info: Optional[np.ndarray] = None,
1639 ) -> np.ndarray:
1640 """Takes xyz and unitcell information to apply the topological calculations on.
1642 When this method is not provided with any input, it will take the
1643 traj_container provided as `traj` in the `__init__()` method and transforms
1644 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer
1645 of a trajectory with identical topology as `self.traj`. If `periodic` was
1646 set to True, `unitcell_vectors` and `unitcell_info` should also be provided.
1648 Args:
1649 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
1650 in nanometer. If None is provided, the coordinates of `self.traj`
1651 will be used. Otherwise, the topology of this set of xyz
1652 coordinates should match the topology of `self.atom`.
1653 Defaults to None.
1654 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
1655 True, the `unitcell_vectors` are needed to calculate the
1656 minimum image convention in a periodic space. This numpy
1657 array should have the shape (n_frames, 3, 3). The rows of this
1658 array correlate to the Bravais vectors a, b, and c.
1659 unitcell_info (Optional[np.ndarray]): Basically identical to
1660 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
1661 the first three columns are the unitcell_lengths in nanometer.
1662 The other three columns are the unitcell_angles in degrees.
1664 Returns:
1665 np.ndarray: The result of the computation with shape (n_frames, n_indexes).
1667 """
1668 if xyz is not None:
1669 periodic = self.periodic
1670 else:
1671 periodic = self.periodic and self.traj._have_unitcell
1672 xyz, unitcell_vectors, unitcell_info = super().transform(
1673 xyz, unitcell_vectors, unitcell_info
1674 )
1675 if periodic:
1676 assert unitcell_info is not None
1678 # check for an orthogonal box
1679 if unitcell_info is None:
1680 # convert to angles
1681 unitcell_angles = []
1682 for fr_unitcell_vectors in unitcell_vectors:
1683 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles(
1684 fr_unitcell_vectors[0],
1685 fr_unitcell_vectors[1],
1686 fr_unitcell_vectors[2],
1687 )
1688 unitcell_angles.append(np.array([alpha, beta, gamma]))
1689 else:
1690 unitcell_angles = unitcell_info[:, 3:]
1691 # check for an orthogonal box
1692 orthogonal = np.allclose(unitcell_angles, 90)
1693 out = np.empty(
1694 (
1695 xyz.shape[0],
1696 self.distance_indexes.shape[0],
1697 ),
1698 dtype="float32",
1699 order="C",
1700 )
1701 _dist_mic(
1702 xyz,
1703 np.ascontiguousarray(self.distance_indexes.astype("int32")),
1704 unitcell_vectors.transpose(0, 2, 1).copy(),
1705 out,
1706 orthogonal,
1707 )
1708 else:
1709 out = np.empty(
1710 (
1711 xyz.shape[0],
1712 self.distance_indexes.shape[0],
1713 ),
1714 dtype="float32",
1715 order="C",
1716 )
1717 _dist(xyz, np.ascontiguousarray(self.distance_indexes.astype("int32")), out)
1718 return out
1721class AlignFeature(SelectionFeature):
1722 prefix_label: str = "aligned ATOM:"
1724 def __init__(
1725 self,
1726 traj: SingleTraj,
1727 reference: md.Trajectory,
1728 indexes: np.ndarray,
1729 atom_indices: Optional[np.ndarray] = None,
1730 ref_atom_indices: Optional[np.ndarray] = None,
1731 in_place: bool = False,
1732 delayed: bool = False,
1733 ) -> None:
1734 super(AlignFeature, self).__init__(traj=traj, indexes=indexes, delayed=delayed)
1735 self.ref = reference
1736 self.atom_indices = atom_indices
1737 self.ref_atom_indices = ref_atom_indices
1738 self.in_place = in_place
1740 def transform(
1741 self,
1742 xyz: Optional[np.ndarray] = None,
1743 unitcell_vectors: Optional[np.ndarray] = None,
1744 unitcell_info: Optional[np.ndarray] = None,
1745 ) -> np.ndarray:
1746 """Returns the aligned xyz coordinates."""
1747 if not self.in_place:
1748 traj = self.traj.traj.slice(slice(None), copy=True)
1749 else:
1750 traj = self.traj.traj
1751 traj.xyz = xyz
1752 aligned = traj.superpose(
1753 reference=self.ref,
1754 atom_indices=self.atom_indices,
1755 ref_atom_indices=self.ref_atom_indices,
1756 )
1757 # apply selection
1758 return super(AlignFeature, self).transform(
1759 aligned.xyz, unitcell_vectors, unitcell_info
1760 )
1763class InverseDistanceFeature(DistanceFeature):
1764 prefix_label: str = "INVDIST:"
1766 def __init__(
1767 self,
1768 traj: Union[SingleTraj, TrajEnsemble],
1769 distance_indexes: np.ndarray,
1770 periodic: bool = True,
1771 delayed: bool = False,
1772 ) -> None:
1773 DistanceFeature.__init__(
1774 self, traj, distance_indexes, periodic=periodic, delayed=delayed
1775 )
1777 @property
1778 def dask_indices(self) -> str:
1779 """str: The name of the delayed transformation to carry out with this feature."""
1780 return "distance_indexes"
1782 @staticmethod
1783 @dask.delayed
1784 def dask_transform(
1785 indexes: np.ndarray,
1786 periodic: bool,
1787 xyz: Optional[np.ndarray] = None,
1788 unitcell_vectors: Optional[np.ndarray] = None,
1789 unitcell_info: Optional[np.ndarray] = None,
1790 ) -> dask.delayed:
1791 """The same as `transform()` but without the need to pickle `traj`.
1793 When dask delayed concurrencies are distributed, required python objects
1794 are pickled. Thus, every feature needs to have its own pickled traj.
1795 That defeats the purpose of dask distributed. Thus, this method implements
1796 the same calculations as `transform` as a more barebones approach.
1797 It foregoes the checks for periodicity and unit-cell shape and just
1798 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a
1799 staticmethod, so it doesn't require `self` to function. However, it
1800 needs the indexes in `self.indexes`. That's why the `dask_indices`
1801 property informs the scheduler to also pickle and pass this object to
1802 the workers.
1804 Args:
1805 indexes (np.ndarray): A numpy array with shape (n, ) giving the
1806 0-based index of the atoms which positions should be returned.
1807 periodic (bool): Whether to observe the minimum image convention
1808 and respect proteins breaking over the periodic boundary
1809 condition as a whole (True). In this case, the trajectory container
1810 in `traj` needs to have unitcell information. Defaults to True.
1811 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
1812 in nanometer. If None is provided, the coordinates of `self.traj`
1813 will be used. Otherwise, the topology of this set of xyz
1814 coordinates should match the topology of `self.atom`.
1815 Defaults to None.
1816 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
1817 True, the `unitcell_vectors` are needed to calculate the
1818 minimum image convention in a periodic space. This numpy
1819 array should have the shape (n_frames, 3, 3). The rows of this
1820 array correlate to the Bravais vectors a, b, and c.
1821 unitcell_info (Optional[np.ndarray]): Basically identical to
1822 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
1823 the first three columns are the unitcell_lengths in nanometer.
1824 The other three columns are the unitcell_angles in degrees.
1826 """
1827 if periodic:
1828 assert unitcell_vectors is not None
1829 # check for an orthogonal box
1830 if unitcell_info is None:
1831 # convert to angles
1832 unitcell_angles = []
1833 for fr_unitcell_vectors in unitcell_vectors:
1834 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles(
1835 fr_unitcell_vectors[0],
1836 fr_unitcell_vectors[1],
1837 fr_unitcell_vectors[2],
1838 )
1839 unitcell_angles.append(np.array([alpha, beta, gamma]))
1840 else:
1841 unitcell_angles = unitcell_info[:, 3:]
1842 # check for an orthogonal box
1843 orthogonal = np.allclose(unitcell_angles, 90)
1845 out = np.empty(
1846 (
1847 xyz.shape[0],
1848 indexes.shape[0],
1849 ),
1850 dtype="float32",
1851 order="C",
1852 )
1853 _dist_mic(
1854 xyz,
1855 indexes.astype("int32"),
1856 unitcell_vectors.transpose(0, 2, 1).copy(),
1857 out,
1858 orthogonal,
1859 )
1860 else:
1861 out = np.empty(
1862 (
1863 xyz.shape[0],
1864 indexes.shape[0],
1865 ),
1866 dtype="float32",
1867 order="C",
1868 )
1869 _dist(xyz, indexes.astype("int32"), out)
1870 return 1 / out
1872 def transform(
1873 self,
1874 xyz: Optional[np.ndarray] = None,
1875 unitcell_vectors: Optional[np.ndarray] = None,
1876 unitcell_info: Optional[np.ndarray] = None,
1877 ) -> np.ndarray:
1878 """Takes xyz and unitcell information to apply the topological calculations on.
1880 When this method is not provided with any input, it will take the
1881 traj_container provided as `traj` in the `__init__()` method and transforms
1882 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer
1883 of a trajectory with identical topology as `self.traj`. If `periodic` was
1884 set to True, `unitcell_vectors` and `unitcell_info` should also be provided.
1886 Args:
1887 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
1888 in nanometer. If None is provided, the coordinates of `self.traj`
1889 will be used. Otherwise, the topology of this set of xyz
1890 coordinates should match the topology of `self.atom`.
1891 Defaults to None.
1892 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
1893 True, the `unitcell_vectors` are needed to calculate the
1894 minimum image convention in a periodic space. This numpy
1895 array should have the shape (n_frames, 3, 3). The rows of this
1896 array correlate to the Bravais vectors a, b, and c.
1897 unitcell_info (Optional[np.ndarray]): Basically identical to
1898 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
1899 the first three columns are the unitcell_lengths in nanometer.
1900 The other three columns are the unitcell_angles in degrees.
1902 Returns:
1903 np.ndarray: The result of the computation with shape (n_frames, n_indexes).
1905 """
1906 return 1.0 / super().transform(xyz, unitcell_vectors, unitcell_info)
1909class ContactFeature(DistanceFeature):
1910 """Defines certain distances as contacts and returns a binary (0, 1) result.
1912 Instead of returning the binary result can also count contacts with the
1913 argument `count_contacts=True` provided at instantiation. In that case,
1914 every frame returns an integer number.
1916 """
1918 prefix_label: str = "CONTACT:"
1919 _nonstandard_transform_args: list[str] = ["threshold", "count_contacts"]
1921 def __init__(
1922 self,
1923 traj: SingleTraj,
1924 distance_indexes: np.ndarray,
1925 threshold: float = 5.0,
1926 periodic: bool = True,
1927 count_contacts: bool = False,
1928 delayed: bool = False,
1929 ) -> None:
1930 """Instantiate the contact feature.
1932 A regular contact feature yields a np.ndarray with zeros and ones.
1933 The zeros are no contact. The ones are contact.
1935 Args:
1936 traj (SingleTraj): An instance of `SingleTraj`.
1937 distance_indexes (np.ndarray): An np.ndarray with shape (n_dists, 2),
1938 where distance_indexes[:, 0] indexes the first atoms of the distance
1939 measurement, and distance_indexes[:, 1] indexes the second atoms of the
1940 distance measurement.
1941 threshold (float): The threshold in nm, under which a distance is
1942 considered to be a contact. Defaults to 5.0 nm.
1943 periodic (bool): Whether to use the minimum image convention when
1944 calculating distances. Defaults to True.
1945 count_contacts (bool): When True, return an integer of the number of
1946 contacts instead of returning the array of regular contacts.
1948 """
1949 super(ContactFeature, self).__init__(
1950 traj, distance_indexes, periodic=periodic, delayed=delayed
1951 )
1952 if count_contacts:
1953 self.prefix_label: str = "counted " + self.prefix_label
1954 self.threshold = threshold
1955 self.count_contacts = count_contacts
1956 if count_contacts:
1957 self.dimension = 1
1958 else:
1959 self.dimension = len(self.distance_indexes)
1961 @property
1962 def dask_indices(self) -> str:
1963 """str: The name of the delayed transformation to carry out with this feature."""
1964 return "distance_indexes"
1966 @staticmethod
1967 @dask.delayed
1968 def dask_transform(
1969 indexes: np.ndarray,
1970 periodic: bool,
1971 threshold: float,
1972 count_contacts: bool,
1973 xyz: Optional[np.ndarray] = None,
1974 unitcell_vectors: Optional[np.ndarray] = None,
1975 unitcell_info: Optional[np.ndarray] = None,
1976 ) -> dask.delayed:
1977 """The same as `transform()` but without the need to pickle `traj`.
1979 When dask delayed concurrencies are distributed, required python objects
1980 are pickled. Thus, every feature needs to have its own pickled traj.
1981 That defeats the purpose of dask distributed. Thus, this method implements
1982 the same calculations as `transform` as a more barebones approach.
1983 It foregoes the checks for periodicity and unit-cell shape and just
1984 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a
1985 staticmethod, so it doesn't require `self` to function. However, it
1986 needs the indexes in `self.indexes`. That's why the `dask_indices`
1987 property informs the scheduler to also pickle and pass this object to
1988 the workers.
1990 Args:
1991 indexes (np.ndarray): A numpy array with shape (n, ) giving the
1992 0-based index of the atoms which positions should be returned.
1993 periodic (bool): Whether to observe the minimum image convention
1994 and respect proteins breaking over the periodic boundary
1995 condition as a whole (True). In this case, the trajectory container
1996 in `traj` needs to have unitcell information. Defaults to True.
1997 threshold (float): The threshold in nm, under which a distance is
1998 considered to be a contact. Defaults to 5.0 nm.
1999 count_contacts (bool): When True, return an integer of the number of contacts
2000 instead of returning the array of regular contacts.
2001 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
2002 in nanometer. If None is provided, the coordinates of `self.traj`
2003 will be used. Otherwise, the topology of this set of xyz
2004 coordinates should match the topology of `self.atom`.
2005 Defaults to None.
2006 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
2007 True, the `unitcell_vectors` are needed to calculate the
2008 minimum image convention in a periodic space. This numpy
2009 array should have the shape (n_frames, 3, 3). The rows of this
2010 array correlate to the Bravais vectors a, b, and c.
2011 unitcell_info (Optional[np.ndarray]): Basically identical to
2012 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
2013 the first three columns are the unitcell_lengths in nanometer.
2014 The other three columns are the unitcell_angles in degrees.
2016 """
2017 if periodic:
2018 assert unitcell_vectors is not None
2019 # check for an orthogonal box
2020 if unitcell_info is None:
2021 # convert to angles
2022 unitcell_angles = []
2023 for fr_unitcell_vectors in unitcell_vectors:
2024 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles(
2025 fr_unitcell_vectors[0],
2026 fr_unitcell_vectors[1],
2027 fr_unitcell_vectors[2],
2028 )
2029 unitcell_angles.append(np.array([alpha, beta, gamma]))
2030 else:
2031 unitcell_angles = unitcell_info[:, 3:]
2032 # check for an orthogonal box
2033 orthogonal = np.allclose(unitcell_angles, 90)
2035 out = np.empty(
2036 (
2037 xyz.shape[0],
2038 indexes.shape[0],
2039 ),
2040 dtype="float32",
2041 order="C",
2042 )
2043 _dist_mic(
2044 xyz,
2045 indexes.astype("int32"),
2046 unitcell_vectors.transpose(0, 2, 1).copy(),
2047 out,
2048 orthogonal,
2049 )
2050 else:
2051 out = np.empty(
2052 (
2053 xyz.shape[0],
2054 indexes.shape[0],
2055 ),
2056 dtype="float32",
2057 order="C",
2058 )
2059 _dist(xyz, indexes.astype("int32"), out)
2060 res = np.zeros((len(out), indexes.shape[0]), dtype=np.float32)
2061 I = np.argwhere(out <= threshold) # noqa: E741
2062 res[I[:, 0], I[:, 1]] = 1.0
2063 if count_contacts:
2064 return res.sum(axis=1, keepdims=True)
2065 else:
2066 return res
2068 def transform(
2069 self,
2070 xyz: Optional[np.ndarray] = None,
2071 unitcell_vectors: Optional[np.ndarray] = None,
2072 unitcell_info: Optional[np.ndarray] = None,
2073 ) -> np.ndarray:
2074 """Takes xyz and unitcell information to apply the topological calculations on.
2076 When this method is not provided with any input, it will take the
2077 traj_container provided as `traj` in the `__init__()` method and transforms
2078 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer
2079 of a trajectory with identical topology as `self.traj`. If `periodic` was
2080 set to True, `unitcell_vectors` and `unitcell_info` should also be provided.
2082 Args:
2083 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
2084 in nanometer. If None is provided, the coordinates of `self.traj`
2085 will be used. Otherwise, the topology of this set of xyz
2086 coordinates should match the topology of `self.atom`.
2087 Defaults to None.
2088 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
2089 True, the `unitcell_vectors` are needed to calculate the
2090 minimum image convention in a periodic space. This numpy
2091 array should have the shape (n_frames, 3, 3). The rows of this
2092 array correlate to the Bravais vectors a, b, and c.
2093 unitcell_info (Optional[np.ndarray]): Basically identical to
2094 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
2095 the first three columns are the unitcell_lengths in nanometer.
2096 The other three columns are the unitcell_angles in degrees.
2098 Returns:
2099 np.ndarray: The result of the computation with shape (n_frames, n_indexes).
2101 """
2102 dists = super(ContactFeature, self).transform(
2103 xyz, unitcell_vectors, unitcell_info
2104 )
2105 res = np.zeros(
2106 (len(self.traj), self.distance_indexes.shape[0]), dtype=np.float32
2107 )
2108 I = np.argwhere(dists <= self.threshold) # noqa: E741
2109 res[I[:, 0], I[:, 1]] = 1.0
2110 if self.count_contacts:
2111 return res.sum(axis=1, keepdims=True)
2112 else:
2113 return res
2116class BackboneTorsionFeature(DihedralFeature):
2117 def __init__(
2118 self,
2119 traj: SingleTraj,
2120 selstr: Optional[str] = None,
2121 deg: bool = False,
2122 cossin: bool = False,
2123 periodic: bool = True,
2124 delayed: bool = False,
2125 ) -> None:
2126 self.traj = traj
2127 indices = self.traj.indices_phi
2128 self.selstr = selstr
2130 if not selstr:
2131 self._phi_inds = indices
2132 else:
2133 self._phi_inds = indices[
2134 np.in1d(indices[:, 1], self.traj.top.select(selstr), assume_unique=True)
2135 ]
2137 indices = self.traj.indices_psi
2138 if not selstr:
2139 self._psi_inds = indices
2140 else:
2141 self._psi_inds = indices[
2142 np.in1d(indices[:, 1], self.traj.top.select(selstr), assume_unique=True)
2143 ]
2145 # alternate phi, psi pairs (phi_1, psi_1, ..., phi_n, psi_n)
2146 dih_indexes = np.array(
2147 list(phi_psi for phi_psi in zip(self._phi_inds, self._psi_inds))
2148 ).reshape(-1, 4)
2150 super(BackboneTorsionFeature, self).__init__(
2151 self.traj,
2152 dih_indexes,
2153 deg=deg,
2154 cossin=cossin,
2155 periodic=periodic,
2156 delayed=delayed,
2157 )
2159 def describe(self) -> list[str]:
2160 """Gives a list of strings describing this feature's feature-axis.
2162 A feature computes a collective variable (CV). A CV is aligned with an MD
2163 trajectory on the time/frame-axis. The feature axis is unique for every
2164 feature. A feature describing the backbone torsions (phi, omega, psi) would
2165 have a feature axis with the size 3*n-3, where n is the number of residues.
2166 The end-to-end distance of a linear protein in contrast would just have
2167 a feature axis with length 1. This `describe()` method will label these
2168 values unambiguously. A backbone torsion feature's `describe()` could be
2169 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
2170 The end-to-end distance feature could be described by
2171 ['distance_between_MET1_and_LYS80'].
2173 Returns:
2174 list[str]: The labels of this feature. The length
2175 is determined by the `dih_indexes` and the `cossin` argument
2176 in the `__init__()` method. If `cossin` is false, then
2177 `len(describe()) == self.angle_indexes[-1]`, else `len(describe())`
2178 is twice as long.
2180 """
2181 top = self.traj.top
2182 getlbl = lambda at: "%i %s %i" % (
2183 at.residue.chain.index,
2184 at.residue.name,
2185 at.residue.resSeq,
2186 )
2188 if self.cossin:
2189 sin_cos = ("COS(PHI %s)", "SIN(PHI %s)")
2190 labels_phi = [
2191 (
2192 sin_cos[0] % getlbl(top.atom(ires[1])),
2193 sin_cos[1] % getlbl(top.atom(ires[1])),
2194 )
2195 for ires in self._phi_inds
2196 ]
2197 sin_cos = ("COS(PSI %s)", "SIN(PSI %s)")
2198 labels_psi = [
2199 (
2200 sin_cos[0] % getlbl(top.atom(ires[1])),
2201 sin_cos[1] % getlbl(top.atom(ires[1])),
2202 )
2203 for ires in self._psi_inds
2204 ]
2205 # produce the same ordering as the given indices (phi_1, psi_1, ..., phi_n, psi_n)
2206 # or (cos(phi_1), sin(phi_1), cos(psi_1), sin(psi_1), ..., cos(phi_n), sin(phi_n), cos(psi_n), sin(psi_n))
2207 res = list(
2208 itertools.chain.from_iterable(
2209 itertools.chain.from_iterable(zip(labels_phi, labels_psi))
2210 )
2211 )
2212 else:
2213 labels_phi = [
2214 "PHI %s" % getlbl(top.atom(ires[1])) for ires in self._phi_inds
2215 ]
2216 labels_psi = [
2217 "PSI %s" % getlbl(top.atom(ires[1])) for ires in self._psi_inds
2218 ]
2219 res = list(itertools.chain.from_iterable(zip(labels_phi, labels_psi)))
2220 return res
2223class ResidueMinDistanceFeature(DistanceFeature):
2224 _nonstandard_transform_args: list[str] = [
2225 "threshold",
2226 "count_contacts",
2227 "scheme",
2228 "top",
2229 ]
2231 def __init__(
2232 self,
2233 traj: SingleTraj,
2234 contacts: np.ndarray,
2235 scheme: Literal["ca", "closest", "closest-heavy"],
2236 ignore_nonprotein: bool,
2237 threshold: float,
2238 periodic: bool,
2239 count_contacts: bool = False,
2240 delayed: bool = False,
2241 ) -> None:
2242 if count_contacts and threshold is None:
2243 raise ValueError(
2244 "Cannot count contacts when no contact threshold is supplied."
2245 )
2247 self.contacts = contacts
2248 self.scheme = scheme
2249 self.threshold = threshold
2250 self.prefix_label: str = "RES_DIST (%s)" % scheme
2251 self.ignore_nonprotein = ignore_nonprotein
2253 if count_contacts:
2254 self.prefix_label: str = "counted " + self.prefix_label
2255 self.count_contacts = count_contacts
2257 dummy_traj = md.Trajectory(np.zeros((traj.top.n_atoms, 3)), traj.top)
2258 dummy_dist, dummy_pairs = md.compute_contacts(
2259 dummy_traj,
2260 contacts=contacts,
2261 scheme=scheme,
2262 periodic=periodic,
2263 ignore_nonprotein=ignore_nonprotein,
2264 )
2266 dim = 1 if count_contacts else dummy_dist.shape[1]
2267 super(ResidueMinDistanceFeature, self).__init__(
2268 distance_indexes=dummy_pairs,
2269 traj=traj,
2270 periodic=periodic,
2271 dim=dim,
2272 delayed=delayed,
2273 )
2275 def describe(self) -> list[str]:
2276 """Gives a list of strings describing this feature's feature-axis.
2278 A feature computes a collective variable (CV). A CV is aligned with an MD
2279 trajectory on the time/frame-axis. The feature axis is unique for every
2280 feature. A feature describing the backbone torsions (phi, omega, psi) would
2281 have a feature axis with the size 3*n-3, where n is the number of residues.
2282 The end-to-end distance of a linear protein in contrast would just have
2283 a feature axis with length 1. This `describe()` method will label these
2284 values unambiguously. A backbone torsion feature's `describe()` could be
2285 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
2286 The end-to-end distance feature could be described by
2287 ['distance_between_MET1_and_LYS80'].
2289 Returns:
2290 list[str]: The labels of this feature.
2292 """
2293 labels = []
2294 for a, b in self.distance_indexes:
2295 labels.append(
2296 f"{self.prefix_label} {self.traj.top.residue(a)} - {self.traj.top.residue(b)}"
2297 )
2298 return labels
2300 @property
2301 def dask_indices(self) -> str:
2302 """str: The name of the delayed transformation to carry out with this feature."""
2303 return "contacts"
2305 @staticmethod
2306 @dask.delayed
2307 def dask_transform(
2308 indexes: np.ndarray,
2309 top: md.Topology,
2310 scheme: Literal["ca", "closest", "closest-heavy"],
2311 periodic: bool,
2312 threshold: float,
2313 count_contacts: bool,
2314 xyz: Optional[np.ndarray] = None,
2315 unitcell_vectors: Optional[np.ndarray] = None,
2316 unitcell_info: Optional[np.ndarray] = None,
2317 ) -> dask.delayed:
2318 """The same as `transform()` but without the need to pickle `traj`.
2320 When dask delayed concurrencies are distributed, required python objects
2321 are pickled. Thus, every feature needs to have its own pickled traj.
2322 That defeats the purpose of dask distributed. Thus, this method implements
2323 the same calculations as `transform` as a more barebones approach.
2324 It foregoes the checks for periodicity and unit-cell shape and just
2325 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a
2326 staticmethod, so it doesn't require `self` to function. However, it
2327 needs the indexes in `self.indexes`. That's why the `dask_indices`
2328 property informs the scheduler to also pickle and pass this object to
2329 the workers.
2331 Args:
2332 indexes (np.ndarray): For this special feature, the indexes argument
2333 in the @staticmethod dask_transform is `self.contacts`.
2334 periodic (bool): Whether to observe the minimum image convention
2335 and respect proteins breaking over the periodic boundary
2336 condition as a whole (True). In this case, the trajectory container
2337 in `traj` needs to have unitcell information. Defaults to True.
2338 threshold (float): The threshold in nm, under which a distance is
2339 considered to be a contact. Defaults to 5.0 nm.
2340 count_contacts (bool): When True, return an integer of the number of contacts
2341 instead of returning the array of regular contacts.
2342 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
2343 in nanometer. If None is provided, the coordinates of `self.traj`
2344 will be used. Otherwise, the topology of this set of xyz
2345 coordinates should match the topology of `self.atom`.
2346 Defaults to None.
2347 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
2348 True, the `unitcell_vectors` are needed to calculate the
2349 minimum image convention in a periodic space. This numpy
2350 array should have the shape (n_frames, 3, 3). The rows of this
2351 array correlate to the Bravais vectors a, b, and c.
2352 unitcell_info (Optional[np.ndarray]): Basically identical to
2353 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
2354 the first three columns are the unitcell_lengths in nanometer.
2355 The other three columns are the unitcell_angles in degrees.
2357 """
2358 # create a dummy traj, with the appropriate topology
2359 traj = md.Trajectory(
2360 xyz=xyz,
2361 topology=top,
2362 unitcell_lengths=unitcell_info[:, :3],
2363 unitcell_angles=unitcell_info[:, 3:],
2364 )
2366 # We let mdtraj compute the contacts with the input scheme
2367 D = md.compute_contacts(
2368 traj,
2369 contacts=indexes,
2370 scheme=scheme,
2371 periodic=periodic,
2372 )[0]
2374 res = np.zeros_like(D)
2375 # Do we want binary?
2376 if threshold is not None:
2377 I = np.argwhere(D <= threshold)
2378 res[I[:, 0], I[:, 1]] = 1.0
2379 else:
2380 res = D
2382 if count_contacts and threshold is not None:
2383 return res.sum(axis=1, keepdims=True)
2384 else:
2385 return res
2387 def transform(
2388 self,
2389 xyz: Optional[np.ndarray] = None,
2390 unitcell_vectors: Optional[np.ndarray] = None,
2391 unitcell_info: Optional[np.ndarray] = None,
2392 ) -> np.ndarray:
2393 """Takes xyz and unitcell information to apply the topological calculations on.
2395 When this method is not provided with any input, it will take the
2396 traj_container provided as `traj` in the `__init__()` method and transforms
2397 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer
2398 of a trajectory with identical topology as `self.traj`. If `periodic` was
2399 set to True, `unitcell_vectors` and `unitcell_info` should also be provided.
2401 Args:
2402 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
2403 in nanometer. If None is provided, the coordinates of `self.traj`
2404 will be used. Otherwise, the topology of this set of xyz
2405 coordinates should match the topology of `self.atom`.
2406 Defaults to None.
2407 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
2408 True, the `unitcell_vectors` are needed to calculate the
2409 minimum image convention in a periodic space. This numpy
2410 array should have the shape (n_frames, 3, 3). The rows of this
2411 array correlate to the Bravais vectors a, b, and c.
2412 unitcell_info (Optional[np.ndarray]): Basically identical to
2413 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
2414 the first three columns are the unitcell_lengths in nanometer.
2415 The other three columns are the unitcell_angles in degrees.
2417 Returns:
2418 np.ndarray: The result of the computation with shape (n_frames, n_indexes).
2420 """
2421 (
2422 xyz,
2423 unitcell_vectors,
2424 unitcell_info,
2425 ) = Feature.transform(self, xyz, unitcell_vectors, unitcell_info)
2427 # create a dummy traj, with the appropriate topology
2428 traj = md.Trajectory(
2429 xyz=xyz,
2430 topology=self.traj.top,
2431 unitcell_lengths=unitcell_info[:, :3],
2432 unitcell_angles=unitcell_info[:, 3:],
2433 )
2435 # We let mdtraj compute the contacts with the input scheme
2436 D = md.compute_contacts(
2437 traj,
2438 contacts=self.contacts,
2439 scheme=self.scheme,
2440 periodic=self.periodic,
2441 )[0]
2443 res = np.zeros_like(D)
2444 # Do we want binary?
2445 if self.threshold is not None:
2446 I = np.argwhere(D <= self.threshold)
2447 res[I[:, 0], I[:, 1]] = 1.0
2448 else:
2449 res = D
2451 if self.count_contacts and self.threshold is not None:
2452 return res.sum(axis=1, keepdims=True)
2453 else:
2454 return res
2457class GroupCOMFeature(Feature):
2458 """Cartesian coordinates of the center-of-mass (COM) of atom groups.
2460 Groups can be defined as sequences of sequences of int. So a list of list of int
2461 can be used to define groups of various sizes. The resulting array will have
2462 the shape of (n_frames, n_groups ** 2). The xyz coordinates are flattended,
2463 so the array can be rebuilt with `np.dstack()`
2465 Examples:
2466 >>> import encodermap as em
2467 >>> import numpy as np
2468 >>> traj = em.SingleTraj.from_pdb_id("1YUG")
2469 >>> f = em.features.GroupCOMFeature(
2470 ... traj=traj,
2471 ... group_definitions=[
2472 ... [0, 1, 2],
2473 ... [3, 4, 5, 6, 7],
2474 ... [8, 9, 10],
2475 ... ]
2476 ... )
2477 >>> a = f.transform()
2478 >>> a.shape # this array is flattened along the feature axis
2479 (15, 9)
2480 >>> a = np.dstack([
2481 ... a[..., ::3],
2482 ... a[..., 1::3],
2483 ... a[..., 2::3],
2484 ... ])
2485 >>> a.shape # now the z, coordinate of the 2nd center of mass is a[:, 1, -1]
2486 (15, 3, 3)
2488 Note:
2489 Centering (`ref_geom`) and imaging (`image_molecules=True`) can be time-
2490 consuming. Consider doing this to your trajectory files prior to featurization.
2492 """
2494 _nonstandard_transform_args: list[str] = [
2495 "top",
2496 "ref_geom",
2497 "image_molecules",
2498 "masses_in_groups",
2499 ]
2501 def __init__(
2502 self,
2503 traj: SingleTraj,
2504 group_definitions: Sequence[Sequence[int]],
2505 ref_geom: Optional[md.Trajectory] = None,
2506 image_molecules: bool = False,
2507 mass_weighted: bool = True,
2508 delayed: bool = False,
2509 ) -> None:
2510 """Instantiate the GroupCOMFeature.
2512 Args:
2513 traj (SingleTraj): An instance of `SingleTraj`.
2514 group_definitions (Sequence[Sequence[int]]): A sequence of sequences
2515 of int defining the groups of which the COM should be calculated.
2516 See the example for how to use this argument.
2517 ref_geom (Optional[md.Trajectory]): The coordinates can be centered
2518 to a reference geometry before computing the COM. Defaults to None.
2519 image_molecules (bool): The method traj.image_molecules will be
2520 called before computing averages. The method tries to correct
2521 for molecules broken across periodic boundary conditions,
2522 but can be time-consuming. See
2523 http://mdtraj.org/latest/api/generated/mdtraj.Trajectory.html#mdtraj.Trajectory.image_molecules
2524 for more details
2525 mass_weighted (bool): Whether the COM should be calculated mass-weighted.
2527 """
2528 self._raise_on_unitcell = False
2529 if not (ref_geom is None or isinstance(ref_geom, md.Trajectory)):
2530 raise ValueError(
2531 f"argument ref_geom has to be either None or and "
2532 f"mdtraj.Trajectory, got instead {type(ref_geom)}"
2533 )
2535 self.ref_geom = ref_geom
2536 self.traj = traj
2537 self.top = traj.top
2538 self.image_molecules = image_molecules
2539 self.group_definitions = [np.asarray(gf) for gf in group_definitions]
2540 self.atom_masses = np.array([aa.element.mass for aa in self.traj.top.atoms])
2542 if mass_weighted:
2543 self.masses_in_groups = [
2544 self.atom_masses[aa_in_rr] for aa_in_rr in self.group_definitions
2545 ]
2546 else:
2547 self.masses_in_groups = [
2548 np.ones_like(aa_in_rr) for aa_in_rr in self.group_definitions
2549 ]
2551 self.delayed = delayed
2553 # Prepare and store the description
2554 self._describe = []
2555 for group in self.group_definitions:
2556 for coor in "xyz":
2557 self._describe.append(
2558 f"COM-{coor} of atom group [{group[:3]}..{group[-3:]}]"
2559 )
2560 self.dimension = 3 * len(self.group_definitions)
2562 @property
2563 def dask_indices(self) -> str:
2564 """str: The name of the delayed transformation to carry out with this feature."""
2565 return "group_definitions"
2567 def describe(self) -> list[str]:
2568 """Gives a list of strings describing this feature's feature-axis.
2570 A feature computes a collective variable (CV). A CV is aligned with an MD
2571 trajectory on the time/frame-axis. The feature axis is unique for every
2572 feature. A feature describing the backbone torsions (phi, omega, psi) would
2573 have a feature axis with the size 3*n-3, where n is the number of residues.
2574 The end-to-end distance of a linear protein in contrast would just have
2575 a feature axis with length 1. This `describe()` method will label these
2576 values unambiguously. A backbone torsion feature's `describe()` could be
2577 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
2578 The end-to-end distance feature could be described by
2579 ['distance_between_MET1_and_LYS80'].
2581 Returns:
2582 list[str]: The labels of this feature.
2584 """
2585 return self._describe
2587 @staticmethod
2588 @dask.delayed
2589 def dask_transform(
2590 indexes: list[list[int]],
2591 top: md.Topology,
2592 ref_geom: Union[md.Trajectory, None],
2593 image_molecules: bool,
2594 masses_in_groups: list[float],
2595 xyz: Optional[np.ndarray] = None,
2596 unitcell_vectors: Optional[np.ndarray] = None,
2597 unitcell_info: Optional[np.ndarray] = None,
2598 ) -> dask.delayed:
2599 """The same as `transform()` but without the need to pickle `traj`.
2601 When dask delayed concurrencies are distributed, required python objects
2602 are pickled. Thus, every feature needs to have its own pickled traj.
2603 That defeats the purpose of dask distributed. Thus, this method implements
2604 the same calculations as `transform` as a more barebones approach.
2605 It foregoes the checks for periodicity and unit-cell shape and just
2606 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a
2607 staticmethod, so it doesn't require `self` to function. However, it
2608 needs the indexes in `self.indexes`. That's why the `dask_indices`
2609 property informs the scheduler to also pickle and pass this object to
2610 the workers.
2612 Args:
2613 indexes (np.ndarray): For this special feature, the indexes argument
2614 in the @staticmethod dask_transform is `self.group_definitions`.
2615 periodic (bool): Whether to observe the minimum image convention
2616 and respect proteins breaking over the periodic boundary
2617 condition as a whole (True). In this case, the trajectory container
2618 in `traj` needs to have unitcell information. Defaults to True.
2619 threshold (float): The threshold in nm, under which a distance is
2620 considered to be a contact. Defaults to 5.0 nm.
2621 count_contacts (bool): When True, return an integer of the number of contacts
2622 instead of returning the array of regular contacts.
2623 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
2624 in nanometer. If None is provided, the coordinates of `self.traj`
2625 will be used. Otherwise, the topology of this set of xyz
2626 coordinates should match the topology of `self.atom`.
2627 Defaults to None.
2628 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
2629 True, the `unitcell_vectors` are needed to calculate the
2630 minimum image convention in a periodic space. This numpy
2631 array should have the shape (n_frames, 3, 3). The rows of this
2632 array correlate to the Bravais vectors a, b, and c.
2633 unitcell_info (Optional[np.ndarray]): Basically identical to
2634 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
2635 the first three columns are the unitcell_lengths in nanometer.
2636 The other three columns are the unitcell_angles in degrees.
2638 """
2639 # create a dummy traj, with the appropriate topology
2640 traj = md.Trajectory(
2641 xyz=xyz,
2642 topology=top,
2643 unitcell_lengths=unitcell_info[:, :3],
2644 unitcell_angles=unitcell_info[:, 3:],
2645 )
2646 COM_xyz = []
2647 if ref_geom is not None:
2648 traj = traj.superpose(ref_geom)
2649 if image_molecules:
2650 traj = traj.image_molecules()
2651 for aas, mms in zip(indexes, masses_in_groups):
2652 COM_xyz.append(
2653 np.average(
2654 traj.xyz[
2655 :,
2656 aas,
2657 ],
2658 axis=1,
2659 weights=mms,
2660 )
2661 )
2662 return np.hstack(COM_xyz)
2664 def transform(
2665 self,
2666 xyz: Optional[np.ndarray] = None,
2667 unitcell_vectors: Optional[np.ndarray] = None,
2668 unitcell_info: Optional[np.ndarray] = None,
2669 ) -> np.ndarray:
2670 """Takes xyz and unitcell information to apply the topological calculations on.
2672 When this method is not provided with any input, it will take the
2673 traj_container provided as `traj` in the `__init__()` method and transforms
2674 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer
2675 of a trajectory with identical topology as `self.traj`. If `periodic` was
2676 set to True, `unitcell_vectors` and `unitcell_info` should also be provided.
2678 Args:
2679 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
2680 in nanometer. If None is provided, the coordinates of `self.traj`
2681 will be used. Otherwise, the topology of this set of xyz
2682 coordinates should match the topology of `self.atom`.
2683 Defaults to None.
2684 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
2685 True, the `unitcell_vectors` are needed to calculate the
2686 minimum image convention in a periodic space. This numpy
2687 array should have the shape (n_frames, 3, 3). The rows of this
2688 array correlate to the Bravais vectors a, b, and c.
2689 unitcell_info (Optional[np.ndarray]): Basically identical to
2690 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
2691 the first three columns are the unitcell_lengths in nanometer.
2692 The other three columns are the unitcell_angles in degrees.
2694 Returns:
2695 np.ndarray: The result of the computation with shape (n_frames, n_indexes).
2697 """
2698 (
2699 xyz,
2700 unitcell_vectors,
2701 unitcell_info,
2702 ) = Feature.transform(self, xyz, unitcell_vectors, unitcell_info)
2703 # create a dummy traj, with the appropriate topology
2704 traj = md.Trajectory(
2705 xyz=xyz,
2706 topology=self.traj.top,
2707 unitcell_lengths=(
2708 unitcell_info[:, :3] if unitcell_info is not None else None
2709 ),
2710 unitcell_angles=unitcell_info[:, 3:] if unitcell_info is not None else None,
2711 )
2712 COM_xyz = []
2713 if self.ref_geom is not None:
2714 traj = traj.superpose(self.ref_geom)
2715 if self.image_molecules:
2716 traj = traj.image_molecules()
2717 for aas, mms in zip(self.group_definitions, self.masses_in_groups):
2718 COM_xyz.append(
2719 np.average(
2720 traj.xyz[
2721 :,
2722 aas,
2723 ],
2724 axis=1,
2725 weights=mms,
2726 )
2727 )
2728 return np.hstack(COM_xyz)
2731class ResidueCOMFeature(GroupCOMFeature):
2732 def __init__(
2733 self,
2734 traj: SingleTraj,
2735 residue_indices: Sequence[int],
2736 residue_atoms: np.ndarray,
2737 scheme: Literal["all", "backbone", "sidechain"] = "all",
2738 ref_geom: Optional[md.Trajectory] = None,
2739 image_molecules: bool = False,
2740 mass_weighted: bool = True,
2741 delayed: bool = False,
2742 ) -> None:
2743 """Instantiate the ResidueCOMFeature.
2745 Args:
2746 residue_indices (Sequence[int]): The residue indices for which the
2747 COM will be computed. These are always zero-indexed that are not
2748 necessarily the residue sequence record of the topology (resSeq).
2749 resSeq indices start at least at 1 but can depend on the topology.
2750 Furthermore, resSeq numbers can be duplicated across chains; residue
2751 indices are always unique.
2754 """
2755 super(ResidueCOMFeature, self).__init__(
2756 traj,
2757 residue_atoms,
2758 mass_weighted=mass_weighted,
2759 ref_geom=ref_geom,
2760 image_molecules=image_molecules,
2761 delayed=delayed,
2762 )
2764 self.residue_indices = residue_indices
2765 self.scheme = scheme
2767 self._describe = []
2768 for ri in self.residue_indices:
2769 for coor in "xyz":
2770 self._describe.append(
2771 f"{self.traj.top.residue(ri)} COM-{coor} ({self.scheme})"
2772 )
2775class SideChainTorsions(DihedralFeature):
2776 options = ("chi1", "chi2", "chi3", "chi4", "chi5")
2778 def __init__(
2779 self,
2780 traj: Union[SingleTraj, TrajEnsemble],
2781 selstr: Optional[str] = None,
2782 deg: bool = False,
2783 cossin: bool = False,
2784 periodic: bool = True,
2785 which: Union[
2786 Literal["all"], Sequence[Literal["chi1", "chi2", "chi3", "chi4", "chi5"]]
2787 ] = "all",
2788 delayed: bool = False,
2789 ) -> None:
2790 if not isinstance(which, (tuple, list)):
2791 which = [which]
2792 if not set(which).issubset(set(self.options) | {"all"}):
2793 raise ValueError(
2794 'Argument "which" should only contain one of {}, but was {}'.format(
2795 ["all"] + list(self.options), which
2796 )
2797 )
2798 if "all" in which:
2799 which = self.options
2801 # get all dihedral index pairs
2802 indices_dict = {k: getattr(traj, "indices_%s" % k) for k in which}
2803 if selstr:
2804 selection = traj.top.select(selstr)
2805 truncated_indices_dict = {}
2806 for k, inds in indices_dict.items():
2807 mask = np.in1d(inds[:, 1], selection, assume_unique=True)
2808 truncated_indices_dict[k] = inds[mask]
2809 indices_dict = truncated_indices_dict
2811 valid = {k: indices_dict[k] for k in indices_dict if indices_dict[k].size > 0}
2812 if not valid:
2813 raise ValueError(
2814 "Could not determine any side chain dihedrals for your topology!"
2815 )
2816 self._prefix_label_lengths = np.array(
2817 [len(indices_dict[k]) if k in which else 0 for k in self.options]
2818 )
2819 indices = np.vstack(list(valid.values()))
2821 super(SideChainTorsions, self).__init__(
2822 traj=traj,
2823 dih_indexes=indices,
2824 deg=deg,
2825 cossin=cossin,
2826 periodic=periodic,
2827 delayed=delayed,
2828 )
2830 def describe(self) -> list[str]:
2831 """Gives a list of strings describing this feature's feature-axis.
2833 A feature computes a collective variable (CV). A CV is aligned with an MD
2834 trajectory on the time/frame-axis. The feature axis is unique for every
2835 feature. A feature describing the backbone torsions (phi, omega, psi) would
2836 have a feature axis with the size 3*n-3, where n is the number of residues.
2837 The end-to-end distance of a linear protein in contrast would just have
2838 a feature axis with length 1. This `describe()` method will label these
2839 values unambiguously. A backbone torsion feature's `describe()` could be
2840 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
2841 The end-to-end distance feature could be described by
2842 ['distance_between_MET1_and_LYS80'].
2844 Returns:
2845 list[str]: The labels of this feature. The length
2846 is determined by the `dih_indexes` and the `cossin` argument
2847 in the `__init__()` method. If `cossin` is false, then
2848 `len(describe()) == self.angle_indexes[-1]`, else `len(describe())`
2849 is twice as long.
2851 """
2852 getlbl = lambda at: "%i %s %i" % (
2853 at.residue.chain.index,
2854 at.residue.name,
2855 at.residue.resSeq,
2856 )
2857 prefixes = []
2858 for lengths, label in zip(self._prefix_label_lengths, self.options):
2859 if self.cossin:
2860 lengths *= 2
2861 prefixes.extend([label.upper()] * lengths)
2863 if self.cossin:
2864 cossin = ("COS({dih} {res})", "SIN({dih} {res})")
2865 labels = [
2866 s.format(
2867 dih=prefixes[j + len(cossin) * i],
2868 res=getlbl(self.top.atom(ires[1])),
2869 )
2870 for i, ires in enumerate(self.angle_indexes)
2871 for j, s in enumerate(cossin)
2872 ]
2873 else:
2874 labels = [
2875 "{dih} {res}".format(
2876 dih=prefixes[i], res=getlbl(self.top.atom(ires[1]))
2877 )
2878 for i, ires in enumerate(self.angle_indexes)
2879 ]
2881 return labels
2884class MinRmsdFeature(Feature):
2885 _nonstandard_transform_args: list[str] = [
2886 "top",
2887 "ref",
2888 ]
2890 def __init__(
2891 self,
2892 traj: SingleTraj,
2893 ref: Union[md.Trajectory, SingleTraj],
2894 ref_frame: int = 0,
2895 atom_indices: Optional[np.ndarray] = None,
2896 precentered: bool = False,
2897 delayed: bool = False,
2898 ) -> None:
2899 # Encodermap imports
2900 from encodermap.trajinfo.info_single import SingleTraj
2902 self._raise_on_unitcell = False
2903 self.traj = traj
2904 self.top = self.traj.top
2905 assert isinstance(
2906 ref_frame, int
2907 ), f"ref_frame has to be of type integer, and not {type(ref_frame)}"
2909 if isinstance(ref, (md.Trajectory, SingleTraj)):
2910 self.name = f"MinRmsdFeature_with_{ref.n_atoms}_atoms_in_reference"
2911 else:
2912 raise TypeError(
2913 f"input reference has to be either `encodermap.SingleTraj` or "
2914 f"a mdtraj.Trajectory object, and not of {ref}"
2915 )
2917 self.ref = ref
2918 self.ref_frame = ref_frame
2919 self.atom_indices = atom_indices
2920 self.precentered = precentered
2921 self.dimension = 1
2922 self.delayed = delayed
2924 @property
2925 def dask_indices(self) -> str:
2926 """str: The name of the delayed transformation to carry out with this feature."""
2927 return "atom_indices"
2929 def describe(self) -> list[str]:
2930 """Gives a list of strings describing this feature's feature-axis.
2932 A feature computes a collective variable (CV). A CV is aligned with an MD
2933 trajectory on the time/frame-axis. The feature axis is unique for every
2934 feature. A feature describing the backbone torsions (phi, omega, psi) would
2935 have a feature axis with the size 3*n-3, where n is the number of residues.
2936 The end-to-end distance of a linear protein in contrast would just have
2937 a feature axis with length 1. This `describe()` method will label these
2938 values unambiguously. A backbone torsion feature's `describe()` could be
2939 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
2940 The end-to-end distance feature could be described by
2941 ['distance_between_MET1_and_LYS80'].
2943 Returns:
2944 list[str]: The labels of this feature.
2946 """
2947 label = "minrmsd to frame %u of %s" % (self.ref_frame, self.name)
2948 if self.precentered:
2949 label += ", precentered=True"
2950 if self.atom_indices is not None:
2951 label += ", subset of atoms "
2952 return [label]
2954 @property
2955 def dask_indices(self):
2956 """str: The name of the delayed transformation to carry out with this feature."""
2957 return "atom_indices"
2959 @staticmethod
2960 @dask.delayed
2961 def dask_transform(
2962 indexes: np.ndarray,
2963 top: md.Topology,
2964 ref: md.Trajectory,
2965 xyz: Optional[np.ndarray] = None,
2966 unitcell_vectors: Optional[np.ndarray] = None,
2967 unitcell_info: Optional[np.ndarray] = None,
2968 ) -> np.ndarray:
2969 """Takes xyz and unitcell information to apply the topological calculations on.
2971 Args:
2972 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
2973 in nanometer. If None is provided, the coordinates of `self.traj`
2974 will be used. Otherwise, the topology of this set of xyz
2975 coordinates should match the topology of `self.atom`.
2976 Defaults to None.
2977 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
2978 True, the `unitcell_vectors` are needed to calculate the
2979 minimum image convention in a periodic space. This numpy
2980 array should have the shape (n_frames, 3, 3). The rows of this
2981 array correlate to the Bravais vectors a, b, and c.
2982 unitcell_info (Optional[np.ndarray]): Basically identical to
2983 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
2984 the first three columns are the unitcell_lengths in nanometer.
2985 The other three columns are the unitcell_angles in degrees.
2987 Returns:
2988 np.ndarray: The result of the computation with shape (n_frames, n_indexes).
2990 """
2991 # create a dummy traj, with the appropriate topology
2992 traj = md.Trajectory(
2993 xyz=xyz,
2994 topology=top,
2995 unitcell_lengths=unitcell_info[:, :3],
2996 unitcell_angles=unitcell_info[:, 3:],
2997 )
2999 return np.array(md.rmsd(traj, ref, atom_indices=indexes), ndmin=2).T
3001 def transform(
3002 self,
3003 xyz: Optional[np.ndarray] = None,
3004 unitcell_vectors: Optional[np.ndarray] = None,
3005 unitcell_info: Optional[np.ndarray] = None,
3006 ) -> np.ndarray:
3007 """Takes xyz and unitcell information to apply the topological calculations on.
3009 When this method is not provided with any input, it will take the
3010 traj_container provided as `traj` in the `__init__()` method and transforms
3011 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer
3012 of a trajectory with identical topology as `self.traj`. If `periodic` was
3013 set to True, `unitcell_vectors` and `unitcell_info` should also be provided.
3015 Args:
3016 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3)
3017 in nanometer. If None is provided, the coordinates of `self.traj`
3018 will be used. Otherwise, the topology of this set of xyz
3019 coordinates should match the topology of `self.atom`.
3020 Defaults to None.
3021 unitcell_vectors (Optional[np.ndarray]): When periodic is set to
3022 True, the `unitcell_vectors` are needed to calculate the
3023 minimum image convention in a periodic space. This numpy
3024 array should have the shape (n_frames, 3, 3). The rows of this
3025 array correlate to the Bravais vectors a, b, and c.
3026 unitcell_info (Optional[np.ndarray]): Basically identical to
3027 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where
3028 the first three columns are the unitcell_lengths in nanometer.
3029 The other three columns are the unitcell_angles in degrees.
3031 Returns:
3032 np.ndarray: The result of the computation with shape (n_frames, n_indexes).
3034 """
3035 (
3036 xyz,
3037 unitcell_vectors,
3038 unitcell_info,
3039 ) = Feature.transform(self, xyz, unitcell_vectors, unitcell_info)
3041 # create a dummy traj, with the appropriate topology
3042 traj = md.Trajectory(
3043 xyz=xyz,
3044 topology=self.traj.top,
3045 unitcell_lengths=unitcell_info[:, :3],
3046 unitcell_angles=unitcell_info[:, 3:],
3047 )
3049 return np.array(
3050 md.rmsd(traj, self.ref, atom_indices=self.atom_indices), ndmin=2
3051 ).T
3054################################################################################
3055# EncoderMap features
3056################################################################################
3059class CentralDihedrals(DihedralFeature):
3060 """Feature that collects all dihedrals in the backbone of a topology.
3062 Attributes:
3063 top (mdtraj.Topology): Topology of this feature.
3064 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
3066 """
3068 def __init__(
3069 self,
3070 traj: Union[SingleTraj, TrajEnsemble],
3071 selstr: Optional[str] = None,
3072 deg: bool = False,
3073 cossin: bool = False,
3074 periodic: bool = True,
3075 omega: bool = True,
3076 generic_labels: bool = False,
3077 check_aas: bool = True,
3078 delayed: bool = False,
3079 ):
3080 """Instantiate this feature class.
3082 Args:
3083 traj (em.SingleTraj): A topology to build features from.
3084 selstr (Optional[str]): A string, that limits the selection of dihedral angles.
3085 Only dihedral angles which atoms are represented by the `selstr` argument
3086 are considered. This selection string follows MDTraj's atom selection
3087 language: https://mdtraj.org/1.9.3/atom_selection.html. Can also
3088 be None, in which case all backbone dihedrals (also omega) are
3089 considered. Defaults to None.
3090 deg (bool): Whether to return the result in degree (`deg=True`) or in
3091 radians (`deg=False`). Defaults to False (radians).
3092 cossin (bool): If True, each angle will be returned as a pair of
3093 (sin(x), cos(x)). This is useful, if you calculate the means
3094 (e.g. TICA/PCA, clustering) in that space. Defaults to False.
3095 periodic (bool): Whether to recognize periodic boundary conditions and
3096 work under the minimum image convention. Defaults to True.
3098 """
3099 self.traj = traj
3100 self.selstr = selstr
3101 self.omega = omega
3103 indices = self.traj.indices_psi
3104 if not selstr:
3105 self._psi_inds = indices
3106 else:
3107 self._psi_inds = indices[
3108 np.in1d(indices[:, 1], self.top.select(selstr), assume_unique=True)
3109 ]
3111 self.omega = omega
3112 if self.omega:
3113 indices = self.traj.indices_omega
3114 if not selstr:
3115 self._omega_inds = indices
3116 else:
3117 self._omega_inds = indices[
3118 np.in1d(indices[:, 1], self.top.select(selstr), assume_unique=True)
3119 ]
3121 indices = self.traj.indices_phi
3122 if not selstr:
3123 self._phi_inds = indices
3124 else:
3125 self._phi_inds = indices[
3126 np.in1d(indices[:, 1], self.top.select(selstr), assume_unique=True)
3127 ]
3129 if self.omega:
3130 zipped = list(zip(self._psi_inds, self._omega_inds, self._phi_inds))
3131 else:
3132 zipped = list(zip(self._psi_inds, self._phi_inds))
3134 # alternate phi, psi , omega pairs (phi_1, psi_1, omega_1..., phi_n, psi_n, omega_n)
3135 dih_indexes = np.array(zipped).reshape(-1, 4)
3137 # set generic_labels for xarray
3138 if generic_labels:
3139 self.describe = self.generic_describe
3141 super(CentralDihedrals, self).__init__(
3142 self.traj,
3143 dih_indexes,
3144 deg=deg,
3145 cossin=cossin,
3146 periodic=periodic,
3147 check_aas=check_aas,
3148 delayed=delayed,
3149 )
3151 @property
3152 def name(self) -> str:
3153 """str: The name of the class: "CentralDihedrals"."""
3154 return "CentralDihedrals"
3156 @property
3157 def indexes(self) -> np.ndarray:
3158 """np.ndarray: A (n_angles, 4) shaped numpy array giving the atom indices
3159 of the dihedral angles to be calculated."""
3160 return self.angle_indexes.astype("int32")
3162 def generic_describe(self) -> list[str]:
3163 """Returns a list of generic labels, not containing residue names.
3164 These can be used to stack tops of different topology.
3166 Returns:
3167 list[str]: A list of labels.
3169 """
3170 if hasattr(self.traj, "clustal_w"):
3171 clustal_w = np.array([*self.traj.clustal_w])
3172 count = len(np.where(clustal_w != "-")[0])
3173 assert count == self.traj.n_residues, (
3174 f"Provided clustal W alignment {self.traj.clustal_w} does not "
3175 f"contain as many residues as traj {self.traj.n_residues}. Can not "
3176 f"use this alignment."
3177 )
3178 _psi_inds = (np.arange(len(clustal_w)) + 1)[clustal_w != "-"][
3179 :-1
3180 ] # last residue can't have psi
3181 _phi_inds = (np.arange(len(clustal_w)) + 1)[clustal_w != "-"][
3182 1:
3183 ] # first residue can't have phi
3184 if self.omega:
3185 _omega_inds = (np.arange(len(clustal_w)) + 1)[clustal_w != "-"][
3186 :-1
3187 ] # last residue can't have omega
3188 assert len(_psi_inds) == len(self._psi_inds)
3189 assert len(_phi_inds) == len(self._phi_inds)
3190 if self.omega:
3191 assert len(_omega_inds) == len(self._omega_inds)
3192 else:
3193 _psi_inds = np.arange(len(self._psi_inds)) + 1
3194 _phi_inds = np.arange(len(self._phi_inds)) + 1
3195 if self.omega:
3196 _omega_inds = np.arange(len(self._omega_inds)) + 1
3198 if self.cossin:
3199 sin_cos = ("COS(PSI %s)", "SIN(PSI %s)")
3200 labels_psi = [
3201 (
3202 sin_cos[0] % i,
3203 sin_cos[1] % i,
3204 )
3205 for i in _psi_inds
3206 ]
3207 if self.omega:
3208 sin_cos = ("COS(OMEGA %s)", "SIN(OMEGA %s)")
3209 labels_omega = [
3210 (
3211 sin_cos[0] % i,
3212 sin_cos[1] % i,
3213 )
3214 for i in _omega_inds
3215 ]
3216 sin_cos = ("COS(PHI %s)", "SIN(PHI %s)")
3217 labels_phi = [
3218 (
3219 sin_cos[0] % i,
3220 sin_cos[1] % i,
3221 )
3222 for i in _phi_inds
3223 ]
3224 # produce the same ordering as the given indices (phi_1, psi_1, ..., phi_n, psi_n)
3225 # or (cos(phi_1), sin(phi_1), cos(psi_1), sin(psi_1), ..., cos(phi_n), sin(phi_n), cos(psi_n), sin(psi_n))
3226 if self.omega:
3227 zipped = zip(labels_psi, labels_omega, labels_phi)
3228 else:
3229 zipped = zip(labels_psi, labels_phi)
3231 res = list(
3232 itertools.chain.from_iterable(itertools.chain.from_iterable(zipped))
3233 )
3234 else:
3235 labels_psi = [f"CENTERDIH PSI {i}" for i in _psi_inds]
3236 if self.omega:
3237 labels_omega = [f"CENTERDIH OMEGA {i}" for i in _omega_inds]
3238 labels_phi = [f"CENTERDIH PHI {i}" for i in _phi_inds]
3239 if self.omega:
3240 zipped = zip(labels_psi, labels_omega, labels_phi)
3241 else:
3242 zipped = zip(labels_psi, labels_phi)
3243 res = list(itertools.chain.from_iterable(zipped))
3245 return res
3247 def describe(self) -> list[str]:
3248 """Gives a list of strings describing this feature's feature-axis.
3250 A feature computes a collective variable (CV). A CV is aligned with an MD
3251 trajectory on the time/frame-axis. The feature axis is unique for every
3252 feature. A feature describing the backbone torsions (phi, omega, psi) would
3253 have a feature axis with the size 3*n-3, where n is the number of residues.
3254 The end-to-end distance of a linear protein in contrast would just have
3255 a feature axis with length 1. This `describe()` method will label these
3256 values unambiguously. A backbone torsion feature's `describe()` could be
3257 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
3258 The end-to-end distance feature could be described by
3259 ['distance_between_MET1_and_LYS80'].
3261 Returns:
3262 list[str]: The labels of this feature. The length
3263 is determined by the `dih_indexes` and the `cossin` argument
3264 in the `__init__()` method. If `cossin` is false, then
3265 `len(describe()) == self.angle_indexes[-1]`, else `len(describe())`
3266 is twice as long.
3268 """
3269 top = self.top
3270 getlbl = (
3271 lambda at: f"RESID {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}"
3272 )
3274 if self.cossin:
3275 sin_cos = ("COS(PSI %s)", "SIN(PSI %s)")
3276 labels_psi = [
3277 (
3278 sin_cos[0] % getlbl(top.atom(ires[1])),
3279 sin_cos[1] % getlbl(top.atom(ires[1])),
3280 )
3281 for ires in self._psi_inds
3282 ]
3283 if self.omega:
3284 sin_cos = ("COS(OMEGA %s)", "SIN(OMEGA %s)")
3285 labels_omega = [
3286 (
3287 sin_cos[0] % getlbl(top.atom(ires[1])),
3288 sin_cos[1] % getlbl(top.atom(ires[1])),
3289 )
3290 for ires in self._omega_inds
3291 ]
3292 sin_cos = ("COS(PHI %s)", "SIN(PHI %s)")
3293 labels_phi = [
3294 (
3295 sin_cos[0] % getlbl(top.atom(ires[1])),
3296 sin_cos[1] % getlbl(top.atom(ires[1])),
3297 )
3298 for ires in self._phi_inds
3299 ]
3300 # produce the same ordering as the given indices (phi_1, psi_1, ..., phi_n, psi_n)
3301 # or (cos(phi_1), sin(phi_1), cos(psi_1), sin(psi_1), ..., cos(phi_n), sin(phi_n), cos(psi_n), sin(psi_n))
3302 if self.omega:
3303 zipped = zip(labels_psi, labels_omega, labels_phi)
3304 else:
3305 zip(labels_psi, labels_phi)
3307 res = list(
3308 itertools.chain.from_iterable(itertools.chain.from_iterable(zipped))
3309 )
3310 else:
3311 labels_psi = [
3312 f"CENTERDIH PSI " + getlbl(top.atom(ires[1]))
3313 for ires in self._psi_inds
3314 ]
3315 if self.omega:
3316 labels_omega = [
3317 "CENTERDIH OMEGA " + getlbl(top.atom(ires[1]))
3318 for ires in self._omega_inds
3319 ]
3320 labels_phi = [
3321 "CENTERDIH PHI " + getlbl(top.atom(ires[1]))
3322 for ires in self._phi_inds
3323 ]
3324 if self.omega:
3325 zipped = zip(labels_psi, labels_omega, labels_phi)
3326 else:
3327 zipped = zip(labels_psi, labels_phi)
3328 res = list(itertools.chain.from_iterable(zipped))
3329 return res
3332class SideChainDihedrals(DihedralFeature):
3333 """Feature that collects all dihedrals in the backbone of a topology.
3335 Attributes:
3336 top (mdtraj.Topology): Topology of this feature.
3337 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
3338 options (list[str]): A list of possible sidechain angles ['chi1' to 'chi5'].
3340 """
3342 options: list[str] = ["chi1", "chi2", "chi3", "chi4", "chi5"]
3344 def __init__(
3345 self,
3346 traj: Union[SingleTraj, TrajEnsemble],
3347 selstr: Optional[str] = None,
3348 deg: bool = False,
3349 cossin: bool = False,
3350 periodic: bool = True,
3351 generic_labels: bool = False,
3352 check_aas: bool = True,
3353 delayed: bool = False,
3354 ) -> None:
3355 which = self.options
3356 # get all dihedral index pairs
3357 indices_dict = {k: getattr(traj, f"indices_{k}") for k in which}
3358 if selstr:
3359 selection = traj.top.select(selstr)
3360 truncated_indices_dict = {}
3361 for k, inds in indices_dict.items():
3362 mask = np.in1d(inds[:, 1], selection, assume_unique=True)
3363 truncated_indices_dict[k] = inds[mask]
3364 indices_dict = truncated_indices_dict
3366 valid = {k: indices_dict[k] for k in indices_dict if indices_dict[k].size > 0}
3367 if not valid:
3368 # Third Party Imports
3369 import mdtraj
3371 try:
3372 mdtraj.compute_chi1(traj)
3373 except Exception as e:
3374 raise ValueError(
3375 "Could not determine any side chain dihedrals for your topology! "
3376 "This is an error inside MDTraj. It errors with this message: "
3377 f"{e}. You can try to provide a custom_topology "
3378 f"for this protein to supersede MDTraj's sidechain recognition "
3379 f"algorithm."
3380 ) from e
3381 else:
3382 raise ValueError(f"No sidechain dihedrals for the trajectory {traj=}.")
3384 # for proteins that don't have some chi angles we filter which
3385 which = list(
3386 filter(
3387 lambda x: True if len(indices_dict[x]) > 0 else False,
3388 indices_dict.keys(),
3389 )
3390 )
3392 # change the sorting to be per-residue and not all chi1 and then all chi2 angles
3393 self.per_res_dict = {}
3394 for r in traj.top.residues:
3395 arrs = []
3396 bools = []
3397 for k in which:
3398 if np.any(np.in1d(valid[k], np.array([a.index for a in r.atoms]))):
3399 where = np.where(
3400 np.in1d(
3401 valid[k].flatten(), np.array([a.index for a in r.atoms])
3402 )
3403 )[0]
3404 arr = valid[k].flatten()[where]
3405 bools.append(True)
3406 arrs.append(arr)
3407 else:
3408 bools.append(False)
3409 if any(bools):
3410 self.per_res_dict[str(r)] = np.vstack(arrs)
3412 self._prefix_label_lengths = np.array(
3413 [len(indices_dict[k]) if k in which else 0 for k in self.options]
3414 )
3415 indices = np.vstack([v for v in self.per_res_dict.values()])
3417 super(SideChainDihedrals, self).__init__(
3418 traj=traj,
3419 dih_indexes=indices,
3420 deg=deg,
3421 cossin=cossin,
3422 periodic=periodic,
3423 check_aas=check_aas,
3424 delayed=delayed,
3425 )
3427 if generic_labels:
3428 self.describe = self.generic_describe
3430 @property
3431 def name(self) -> str:
3432 """str: The name of the class: "SideChainDihedrals"."""
3433 return "SideChainDihedrals"
3435 @property
3436 def indexes(self) -> np.ndarray:
3437 """np.ndarray: A (n_angles, 4) shaped numpy array giving the atom indices
3438 of the dihedral angles to be calculated."""
3439 return self.angle_indexes
3441 def generic_describe(self) -> list[str]:
3442 """Returns a list of generic labels, not containing residue names.
3443 These can be used to stack tops of different topology.
3445 Returns:
3446 list[str]: A list of labels.
3448 """
3449 if hasattr(self.traj, "clustal_w"):
3450 residue_mapping = {}
3451 i = 1
3452 j = 1
3453 for res in [*self.traj.clustal_w]:
3454 if res == "-":
3455 j += 1
3456 continue
3457 residue_mapping[i] = j
3458 i += 1
3459 j += 1
3461 def getlbl(at: md.topology.Atom):
3462 resSeq = at.residue.resSeq
3463 resSeq = residue_mapping[resSeq]
3464 r = f"RESID {at.residue.name}:{resSeq:>4} CHAIN {at.residue.chain.index}"
3465 return r
3467 else:
3468 getlbl = (
3469 lambda at: f"RESID {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}"
3470 )
3471 prefixes = []
3472 for lengths, label in zip(self._prefix_label_lengths, self.options):
3473 if self.cossin:
3474 lengths *= 2
3475 prefixes.extend([label.upper()] * lengths)
3476 prefixes = []
3477 for key, value in self.per_res_dict.items():
3478 if self.cossin:
3479 prefixes.extend(
3480 [opt.upper() for opt in self.options[: value.shape[0]]] * 2
3481 )
3482 else:
3483 prefixes.extend([opt.upper() for opt in self.options[: value.shape[0]]])
3485 if self.cossin:
3486 cossin = ("COS({dih} {res})", "SIN({dih} {res})")
3487 labels = [
3488 s.format(
3489 dih=prefixes[j + len(cossin) * i],
3490 res=getlbl(self.top.atom(ires[1])),
3491 )
3492 for i, ires in enumerate(self.angle_indexes)
3493 for j, s in enumerate(cossin)
3494 ]
3495 else:
3496 labels = [
3497 "SIDECHDIH {dih} {res}".format(
3498 dih=prefixes[i], res=getlbl(self.top.atom(ires[1]))
3499 )
3500 for i, ires in enumerate(self.angle_indexes)
3501 ]
3502 labels = list(map(lambda x: x[:14] + x[27:31], labels))
3503 return labels
3505 def describe(self) -> list[str]:
3506 """Gives a list of strings describing this feature's feature-axis.
3508 A feature computes a collective variable (CV). A CV is aligned with an MD
3509 trajectory on the time/frame-axis. The feature axis is unique for every
3510 feature. A feature describing the backbone torsions (phi, omega, psi) would
3511 have a feature axis with the size 3*n-3, where n is the number of residues.
3512 The end-to-end distance of a linear protein in contrast would just have
3513 a feature axis with length 1. This `describe()` method will label these
3514 values unambiguously. A backbone torsion feature's `describe()` could be
3515 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
3516 The end-to-end distance feature could be described by
3517 ['distance_between_MET1_and_LYS80'].
3519 Returns:
3520 list[str]: The labels of this feature. The length
3521 is determined by the `dih_indexes` and the `cossin` argument
3522 in the `__init__()` method. If `cossin` is false, then
3523 `len(describe()) == self.angle_indexes[-1]`, else `len(describe())`
3524 is twice as long.
3526 """
3527 top = self.top
3528 getlbl = (
3529 lambda at: f"RESID {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}"
3530 )
3531 prefixes = []
3532 for lengths, label in zip(self._prefix_label_lengths, self.options):
3533 if self.cossin:
3534 lengths *= 2
3535 prefixes.extend([label.upper()] * lengths)
3536 prefixes = []
3537 for key, value in self.per_res_dict.items():
3538 if self.cossin:
3539 prefixes.extend(
3540 [opt.upper() for opt in self.options[: value.shape[0]]] * 2
3541 )
3542 else:
3543 prefixes.extend([opt.upper() for opt in self.options[: value.shape[0]]])
3545 if self.cossin:
3546 cossin = ("COS({dih} {res})", "SIN({dih} {res})")
3547 labels = [
3548 s.format(
3549 dih=prefixes[j + len(cossin) * i],
3550 res=getlbl(self.top.atom(ires[1])),
3551 )
3552 for i, ires in enumerate(self.angle_indexes)
3553 for j, s in enumerate(cossin)
3554 ]
3555 else:
3556 labels = [
3557 "SIDECHDIH {dih} {res}".format(
3558 dih=prefixes[i], res=getlbl(self.top.atom(ires[1]))
3559 )
3560 for i, ires in enumerate(self.angle_indexes)
3561 ]
3563 return labels
3566class AllCartesians(SelectionFeature):
3567 """Feature that collects all cartesian positions of all atoms in the trajectory.
3569 Note:
3570 The order of the cartesians is not as in standard MD coordinates.
3571 Rather than giving the positions of all atoms of the first residue, and
3572 then all positions of the second, and so on, this feature gives all
3573 central (backbone) cartesians first, followed by the cartesians of the
3574 sidechains. This allows better and faster backmapping. See
3575 `encodermap.misc.backmapping._full_backmapping_np` for mor info,
3576 why this is easier.
3578 Attributes:
3579 top (mdtraj.Topology): Topology of this feature.
3580 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
3581 prefix_label (str): A prefix for the labels. In this case, it is 'POSITION'.
3583 """
3585 prefix_label: str = "POSITION "
3587 def __init__(
3588 self,
3589 traj: SingleTraj,
3590 check_aas: bool = True,
3591 generic_labels: bool = False,
3592 delayed: bool = False,
3593 ) -> None:
3594 """Instantiate the AllCartesians class.
3596 Args:
3597 traj (em.SingleTraj): A mdtraj topology.
3599 """
3600 self.central_indices = CentralCartesians(traj).indexes
3601 try:
3602 indexes = np.concatenate(
3603 [self.central_indices, SideChainCartesians(traj).indexes]
3604 )
3605 except ValueError as e:
3606 if "Could not determine" in str(e):
3607 warnings.warn(
3608 f"The topology of {traj} does not contain any sidechains. The "
3609 f"`AllCartesians` feature will just contain backbone coordinates."
3610 )
3611 indexes = CentralCartesians(traj).indexes
3612 else:
3613 raise e
3614 super().__init__(traj, indexes=indexes, check_aas=check_aas, delayed=delayed)
3615 if generic_labels:
3616 self.describe = self.generic_describe
3618 @property
3619 def name(self) -> str:
3620 """str: The name of this class: 'AllCartesians'"""
3621 return "AllCartesians"
3623 def generic_describe(self) -> list[str]:
3624 """Returns a list of generic labels, not containing residue names.
3625 These can be used to stack tops of different topology.
3627 Returns:
3628 list[str]: A list of labels.
3630 """
3631 labels = []
3632 if hasattr(self.traj, "clustal_w"):
3633 raise NotImplementedError(
3634 f"SideChainCartesians can't currently handle alignments. The "
3635 f"implementation below won't probably work."
3636 )
3637 # clustal_w_ = [*self.traj.clustal_w]
3638 # clustal_w = [None] * (len(clustal_w_) * 3)
3639 # clustal_w[::3] = clustal_w_
3640 # clustal_w[1::3] = clustal_w_
3641 # clustal_w[2::3] = clustal_w_
3642 # clustal_w = np.array(clustal_w)
3643 # indices = (np.arange(len(clustal_w)) + 1)[clustal_w != "-"]
3644 # assert len(indices) == len(
3645 # self.central_indexes
3646 # ), f"{indices.shape=} {self.indexes.shape=} {clustal_w.shape=} {clustal_w[:20]}"
3647 else:
3648 indices = self.indexes
3649 visited_residues = set()
3650 for i in indices:
3651 if i in self.central_indices:
3652 position = "c"
3653 else:
3654 position = "s"
3655 residx = self.traj.top.atom(i).residue.index + 1
3656 rescode = str(residx) + position
3657 if rescode not in visited_residues:
3658 visited_residues.add(rescode)
3659 atom_index = 1
3660 for pos in ["X", "Y", "Z"]:
3661 labels.append(
3662 f"{self.prefix_label} {pos} {atom_index} {residx} {position}"
3663 )
3664 atom_index += 1
3665 return labels
3667 def describe(self) -> list[str]:
3668 """Gives a list of strings describing this feature's feature-axis.
3670 A feature computes a collective variable (CV). A CV is aligned with an MD
3671 trajectory on the time/frame-axis. The feature axis is unique for every
3672 feature. A feature describing the backbone torsions (phi, omega, psi) would
3673 have a feature axis with the size 3*n-3, where n is the number of residues.
3674 The end-to-end distance of a linear protein in contrast would just have
3675 a feature axis with length 1. This `describe()` method will label these
3676 values unambiguously. A backbone torsion feature's `describe()` could be
3677 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
3678 The end-to-end distance feature could be described by
3679 ['distance_between_MET1_and_LYS80'].
3681 Returns:
3682 list[str]: The labels of this feature. This list has as many entries as atoms in `self.top`.
3684 """
3685 getlbl = (
3686 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}"
3687 )
3688 labels = []
3689 for i in self.indexes:
3690 for pos in ["X", "Y", "Z"]:
3691 labels.append(
3692 f"{self.prefix_label} {pos} {getlbl(self.top.atom(i))}"
3693 )
3694 return labels
3697class CentralCartesians(SelectionFeature):
3698 """Feature that collects all cartesian positions of the backbone atoms.
3700 Examples:
3701 >>> import encodermap as em
3702 >>> from pprint import pprint
3703 >>> traj = em.load_project("pASP_pGLU", 0)[0]
3704 >>> traj # doctest: +ELLIPSIS
3705 <encodermap.SingleTraj object...>
3706 >>> feature = em.features.CentralCartesians(traj, generic_labels=False)
3707 >>> pprint(feature.describe()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
3708 ['CENTERPOS X ATOM N: 0 GLU: 1 CHAIN 0',
3709 'CENTERPOS Y ATOM N: 0 GLU: 1 CHAIN 0',
3710 'CENTERPOS Z ATOM N: 0 GLU: 1 CHAIN 0',
3711 'CENTERPOS X ATOM CA: 3 GLU: 1 CHAIN 0',
3712 'CENTERPOS Y ATOM CA: 3 GLU: 1 CHAIN 0',
3713 'CENTERPOS Z ATOM CA: 3 GLU: 1 CHAIN 0',
3714 '...
3715 'CENTERPOS Z ATOM C: 65 GLU: 6 CHAIN 0']
3716 >>> feature = em.features.CentralCartesians(traj, generic_labels=True)
3717 >>> pprint(feature.describe()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
3718 ['CENTERPOS X 1',
3719 'CENTERPOS Y 1',
3720 'CENTERPOS Z 1',
3721 'CENTERPOS X 2',
3722 'CENTERPOS Y 2',
3723 'CENTERPOS Z 2',
3724 '...
3725 'CENTERPOS Z 18']
3727 """
3729 prefix_label: str = "CENTERPOS"
3731 def __init__(
3732 self,
3733 traj: SingleTraj,
3734 generic_labels: bool = False,
3735 check_aas: bool = True,
3736 delayed: bool = False,
3737 ) -> None:
3738 """Instantiate the CentralCartesians class.
3740 In contrary to PyEMMA (which has now been archived), this feature returns
3741 a high-dimensional array along the feature axis. In PyEMMA's and now in
3742 EncoderMap's `SelectionFeature`, the cartesian coordinates of the atoms are
3743 returned as a list of [x1, y1, z1, x2, y2, z2, x3, ..., zn]. This feature
3744 yields a (n_atoms, 3) array with an extra dimension (carteisan coordinate)::
3746 [
3747 [x1, y1, z1],
3748 [x2, y2, z2],
3749 ...,
3750 [xn, yn, zn],
3751 ]
3753 Args:
3754 traj (SingleTraj): An instance of `encodermap.SingleTraj`. Using
3755 `SingleTrajs` instead of `md.Topology` (as it was in PyEMMA),
3756 offers access to EncoderMap's `CustomTopology`, which can be
3757 used to adapt the featurization and NN training for a wide
3758 range of protein and non-protein MD trajectories.
3759 generic_labels (bool): Whether to use generic labels to describe the
3760 feature. Generic labels can be used to align different topologies.
3761 If False, the labels returned by this feature's `describe()` method
3762 are topology-specific ("CENTERPOS X ATOM N: 0 ASP: 1 CHAIN 0").
3763 If True, the labels are generic ("CENTERPOS X 0") and can be
3764 aligned with other Features, that contain topologies, of which ASP
3765 might not be the first amino acid.
3766 check_aas (bool): Whether to check if all residues in `traj` are
3767 known, prior to computing.
3769 """
3770 self.traj = traj
3771 self.indexes = self.traj.top.select("name CA or name C or name N")
3772 # filter out unwanted indexes
3773 unwanted_resnames = [
3774 k for k, v in self.traj._custom_top.amino_acid_codes.items() if v is None
3775 ]
3776 self.indexes = np.array(
3777 list(
3778 filter(
3779 lambda x: self.traj.top.atom(x).residue.name
3780 not in unwanted_resnames,
3781 self.indexes,
3782 )
3783 )
3784 )
3785 super().__init__(
3786 self.traj, indexes=self.indexes, check_aas=check_aas, delayed=delayed
3787 )
3788 self.dimension = 3 * len(self.indexes)
3790 if generic_labels:
3791 self.describe = self.generic_describe
3793 def generic_describe(self) -> list[str]:
3794 """Returns a list of generic labels, not containing residue names.
3795 These can be used to stack tops of different topology.
3797 Returns:
3798 list[str]: A list of labels.
3800 """
3801 labels = []
3802 if hasattr(self.traj, "clustal_w"):
3803 clustal_w_ = [*self.traj.clustal_w]
3804 clustal_w = [None] * (len(clustal_w_) * 3)
3805 clustal_w[::3] = clustal_w_
3806 clustal_w[1::3] = clustal_w_
3807 clustal_w[2::3] = clustal_w_
3808 clustal_w = np.array(clustal_w)
3809 indices = (np.arange(len(clustal_w)) + 1)[clustal_w != "-"]
3810 assert len(indices) == len(
3811 self.indexes
3812 ), f"{indices.shape=} {self.indexes.shape=} {clustal_w.shape=} {clustal_w[:20]}"
3813 else:
3814 indices = np.arange(len(self.indexes)) + 1
3815 for i in indices:
3816 for pos in ["X", "Y", "Z"]:
3817 labels.append(f"{self.prefix_label} {pos} {i}")
3818 return labels
3820 def describe(self) -> list[str]:
3821 """Gives a list of strings describing this feature's feature-axis.
3823 A feature computes a collective variable (CV). A CV is aligned with an MD
3824 trajectory on the time/frame-axis. The feature axis is unique for every
3825 feature. A feature describing the backbone torsions (phi, omega, psi) would
3826 have a feature axis with the size 3*n-3, where n is the number of residues.
3827 The end-to-end distance of a linear protein in contrast would just have
3828 a feature axis with length 1. This `describe()` method will label these
3829 values unambiguously. A backbone torsion feature's `describe()` could be
3830 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
3831 The end-to-end distance feature could be described by
3832 ['distance_between_MET1_and_LYS80'].
3834 Returns:
3835 list[str]: The labels of this feature.
3837 """
3838 getlbl = (
3839 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}"
3840 )
3841 labels = []
3842 for i in self.indexes:
3843 for pos in ["X", "Y", "Z"]:
3844 labels.append(
3845 f"{self.prefix_label} {pos} {getlbl(self.top.atom(i))}"
3846 )
3847 return labels
3849 @property
3850 def name(self) -> str:
3851 """str: The name of the class: "CentralCartesians"."""
3852 return "CentralCartesians"
3855class SideChainCartesians(SelectionFeature):
3856 """Feature that collects all cartesian positions of all non-backbone atoms.
3858 Attributes:
3859 top (mdtraj.Topology): Topology of this feature.
3860 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
3861 prefix_label (str): A prefix for the labels. In this case it is 'SIDECHPOS'.
3863 """
3865 prefix_label: str = "SIDECHPOS"
3867 def __init__(
3868 self,
3869 traj: SingleTraj,
3870 check_aas: bool = True,
3871 generic_labels: bool = False,
3872 delayed: bool = False,
3873 ) -> None:
3874 """Instantiate the `SideChainCartesians feature.
3876 Uses MDTraj's 'not backbone' topology selector. Is not guaranteed to
3877 work with the better tested `SideChainDihedrals`.
3879 """
3880 self.traj = traj
3881 dihe_indices = np.unique(SideChainDihedrals(traj=traj).angle_indexes.flatten())
3882 backbone_indices = CentralCartesians(traj=traj).indexes
3883 indexes = np.setdiff1d(dihe_indices, backbone_indices)
3884 assert indexes[0] in dihe_indices and indexes[0] not in backbone_indices
3885 super().__init__(
3886 self.traj, indexes=indexes, check_aas=check_aas, delayed=delayed
3887 )
3888 self.dimension = 3 * len(self.indexes)
3889 if generic_labels:
3890 self.describe = self.generic_describe
3892 def generic_describe(self) -> list[str]:
3893 """Returns a list of generic labels, not containing residue names.
3894 These can be used to stack tops of different topology.
3896 Returns:
3897 list[str]: A list of labels.
3899 """
3900 labels = []
3901 if hasattr(self.traj, "clustal_w"):
3902 raise NotImplementedError(
3903 f"SideChainCartesians can't currently handle alignments. The "
3904 f"implementation below won't probably work."
3905 )
3906 # clustal_w_ = [*self.traj.clustal_w]
3907 # clustal_w = [None] * (len(clustal_w_) * 3)
3908 # clustal_w[::3] = clustal_w_
3909 # clustal_w[1::3] = clustal_w_
3910 # clustal_w[2::3] = clustal_w_
3911 # clustal_w = np.array(clustal_w)
3912 # indices = (np.arange(len(clustal_w)) + 1)[clustal_w != "-"]
3913 # assert len(indices) == len(
3914 # self.central_indexes
3915 # ), f"{indices.shape=} {self.indexes.shape=} {clustal_w.shape=} {clustal_w[:20]}"
3916 else:
3917 indices = self.indexes
3918 visited_residues = set()
3919 for i in indices:
3920 residx = self.traj.top.atom(i).residue.index + 1
3921 if residx not in visited_residues:
3922 visited_residues.add(residx)
3923 atom_index = 1
3924 for pos in ["X", "Y", "Z"]:
3925 labels.append(f"{self.prefix_label} {pos} {atom_index} {residx}")
3926 atom_index += 1
3927 return labels
3929 @property
3930 def name(self):
3931 """str: The name of the class: "SideChainCartesians"."""
3932 return "SideChainCartesians"
3934 def describe(self) -> list[str]:
3935 """Gives a list of strings describing this feature's feature-axis.
3937 A feature computes a collective variable (CV). A CV is aligned with an MD
3938 trajectory on the time/frame-axis. The feature axis is unique for every
3939 feature. A feature describing the backbone torsions (phi, omega, psi) would
3940 have a feature axis with the size 3*n-3, where n is the number of residues.
3941 The end-to-end distance of a linear protein in contrast would just have
3942 a feature axis with length 1. This `describe()` method will label these
3943 values unambiguously. A backbone torsion feature's `describe()` could be
3944 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
3945 The end-to-end distance feature could be described by
3946 ['distance_between_MET1_and_LYS80'].
3948 Returns:
3949 list[str]: The labels of this feature.
3951 """
3952 getlbl = (
3953 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}"
3954 )
3955 labels = []
3956 for i in self.indexes:
3957 for pos in ["X", "Y", "Z"]:
3958 labels.append(
3959 f"{self.prefix_label} {pos} {getlbl(self.top.atom(i))}"
3960 )
3961 return labels
3964class AllBondDistances(DistanceFeature):
3965 """Feature that collects all bonds in a topology.
3967 Attributes:
3968 top (mdtraj.Topology): Topology of this feature.
3969 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
3970 prefix_label (str): A prefix for the labels. In this case it is 'DISTANCE'.
3972 """
3974 prefix_label: str = "DISTANCE "
3976 def __init__(
3977 self,
3978 traj: SingleTraj,
3979 distance_indexes: Optional[np.ndarray] = None,
3980 periodic: bool = True,
3981 check_aas: bool = True,
3982 delayed: bool = False,
3983 ) -> None:
3984 self.distance_indexes = distance_indexes
3985 if self.distance_indexes is None:
3986 self.traj = traj
3987 self.distance_indexes = np.vstack(
3988 [[b[0].index, b[1].index] for b in self.traj.top.bonds]
3989 )
3990 # print(self.distance_indexes, len(self.distance_indexes))
3991 super().__init__(
3992 self.traj,
3993 self.distance_indexes,
3994 periodic,
3995 check_aas=check_aas,
3996 delayed=delayed,
3997 )
3998 else:
3999 super().__init__(
4000 self.traj,
4001 self.distance_indexes,
4002 periodic,
4003 check_aas=check_aas,
4004 delayed=delayed,
4005 )
4006 # print(self.distance_indexes, len(self.distance_indexes))
4008 def generic_describe(self) -> list[str]:
4009 """Returns a list of generic labels, not containing residue names.
4010 These can be used to stack tops of different topology.
4012 Returns:
4013 list[str]: A list of labels.
4015 """
4016 if hasattr(self.traj, "clustal_w"):
4017 raise NotImplementedError(
4018 f"AllBondDistances can currently not align disjoint sequences."
4019 )
4020 else:
4021 indices = np.arange(len(self.distance_indexes)) + 1
4022 labels = []
4023 for i in indices:
4024 labels.append(f"{self.prefix_label}{i}")
4025 return labels
4027 def describe(self) -> list[str]:
4028 """Gives a list of strings describing this feature's feature-axis.
4030 A feature computes a collective variable (CV). A CV is aligned with an MD
4031 trajectory on the time/frame-axis. The feature axis is unique for every
4032 feature. A feature describing the backbone torsions (phi, omega, psi) would
4033 have a feature axis with the size 3*n-3, where n is the number of residues.
4034 The end-to-end distance of a linear protein in contrast would just have
4035 a feature axis with length 1. This `describe()` method will label these
4036 values unambiguously. A backbone torsion feature's `describe()` could be
4037 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
4038 The end-to-end distance feature could be described by
4039 ['distance_between_MET1_and_LYS80'].
4041 Returns:
4042 list[str]: The labels of this feature.
4044 """
4045 getlbl = (
4046 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4}"
4047 )
4048 labels = []
4049 for i, j in self.distance_indexes:
4050 i, j = self.top.atom(i), self.top.atom(j)
4051 labels.append(
4052 f"{self.prefix_label}{getlbl(i)} DIST {getlbl(j)} CHAIN {int(np.unique([a.residue.chain.index for a in [i, j]])[0])}"
4053 )
4054 return labels
4056 @property
4057 def name(self) -> str:
4058 """str: The name of the class: "AllBondDistances"."""
4059 return "AllBondDistances"
4061 @property
4062 def indexes(self) -> np.ndarray:
4063 """np.ndarray: A (n_angles, 2) shaped numpy array giving the atom indices
4064 of the distances to be calculated."""
4065 return self.distance_indexes
4068class CentralBondDistances(AllBondDistances):
4069 """Feature that collects all bonds in the backbone of a topology.
4071 Attributes:
4072 top (mdtraj.Topology): Topology of this feature.
4073 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
4074 prefix_label (str): A prefix for the labels. In this case, it is 'CENTERDISTANCE'.
4076 """
4078 prefix_label: str = "CENTERDISTANCE "
4080 def __init__(
4081 self,
4082 traj: SingleTraj,
4083 distance_indexes: Optional[np.ndarray] = None,
4084 periodic: bool = True,
4085 generic_labels: bool = False,
4086 check_aas: bool = True,
4087 delayed: bool = False,
4088 ) -> None:
4089 self.traj = traj
4090 select = traj.top.select("name CA or name C or name N")
4092 if distance_indexes is None:
4093 distance_indexes = []
4095 for b in traj.top.bonds:
4096 if np.all([np.isin(x.index, select) for x in b]):
4097 distance_indexes.append([x.index for x in b])
4098 distance_indexes = np.sort(distance_indexes, axis=0)
4100 if generic_labels:
4101 self.describe = self.generic_describe
4103 super().__init__(
4104 self.traj, distance_indexes, periodic, check_aas=check_aas, delayed=delayed
4105 )
4107 def generic_describe(self) -> list[str]:
4108 """Returns a list of generic labels, not containing residue names.
4109 These can be used to stack tops of different topology.
4111 Returns:
4112 list[str]: A list of labels.
4114 """
4115 if hasattr(self.traj, "clustal_w"):
4116 indices = []
4117 clustal_w_ = [*self.traj.clustal_w]
4118 clustal_w = [None] * (len(clustal_w_) * 3)
4119 clustal_w[::3] = clustal_w_
4120 clustal_w[1::3] = clustal_w_
4121 clustal_w[2::3] = clustal_w_
4122 i = 0
4123 for a, b in zip(clustal_w[:-1], clustal_w[1:]):
4124 i += 1
4125 if a == "-":
4126 continue
4127 indices.append(i)
4128 indices = np.array(indices)
4129 else:
4130 indices = np.arange(len(self.distance_indexes)) + 1
4131 labels = []
4132 for i in indices:
4133 labels.append(f"{self.prefix_label}{i}")
4134 return labels
4136 @property
4137 def name(self) -> str:
4138 """str: The name of the class: "CentralBondDistances"."""
4139 return "CentralBondDistances"
4141 @property
4142 def indexes(self) -> np.ndarray:
4143 """np.ndarray: A (n_angles, 2) shaped numpy array giving the atom indices
4144 of the distances to be calculated."""
4145 return self.distance_indexes
4148class SideChainBondDistances(AllBondDistances):
4149 """Feature that collects all bonds not in the backbone of a topology.
4151 Attributes:
4152 top (mdtraj.Topology): Topology of this feature.
4153 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
4154 prefix_label (str): A prefix for the labels. In this case it is 'SIDECHDISTANCE'.
4156 """
4158 prefix_label: str = "SIDECHDISTANCE "
4160 def __init__(
4161 self,
4162 traj: SingleTraj,
4163 periodic: bool = True,
4164 check_aas: bool = True,
4165 generic_labels: bool = False,
4166 delayed: bool = False,
4167 ) -> None:
4168 self.traj = traj
4170 which = ["chi1", "chi2", "chi3", "chi4", "chi5"]
4171 indices_dict = {k: getattr(self.traj, f"indices_{k}") for k in which}
4172 # flat_list = [
4173 # item
4174 # for sublist in indices_dict.values()
4175 # for item in sublist.flatten().tolist()
4176 # ]
4177 # atoms_in_sidechain_dihedrals = set(flat_list)
4179 distance_indexes = []
4180 for angle, indices in indices_dict.items():
4181 for index in indices:
4182 if angle == "chi1":
4183 distance_indexes.append([index[1], index[2]])
4184 distance_indexes.append([index[2], index[3]])
4185 else:
4186 distance_indexes.append([index[2], index[3]])
4187 distance_indexes = np.sort(distance_indexes, axis=0)
4188 super().__init__(
4189 self.traj,
4190 distance_indexes=distance_indexes,
4191 periodic=periodic,
4192 check_aas=check_aas,
4193 delayed=delayed,
4194 )
4195 if generic_labels:
4196 self.describe = self.generic_describe
4198 def generic_describe(self) -> list[str]:
4199 """Returns a list of generic labels, not containing residue names.
4200 These can be used to stack tops of different topology.
4202 Returns:
4203 list[str]: A list of labels.
4205 """
4206 labels = []
4207 if hasattr(self.traj, "clustal_w"):
4208 raise NotImplementedError
4209 # indices = []
4210 # clustal_w_ = [*self.traj.clustal_w]
4211 # clustal_w = [None] * (len(clustal_w_) * 3)
4212 # clustal_w[::3] = clustal_w_
4213 # clustal_w[1::3] = clustal_w_
4214 # clustal_w[2::3] = clustal_w_
4215 # i = 0
4216 # for a, b in zip(clustal_w[:-1], clustal_w[1:]):
4217 # i += 1
4218 # if a == "-":
4219 # continue
4220 # indices.append(i)
4221 # indices = np.array(indices)
4222 else:
4223 indices = self.distance_indexes
4224 visited_residues = set()
4225 for a, b in indices:
4226 residx_a = self.traj.top.atom(a).residue.index + 1
4227 residx_b = self.traj.top.atom(b).residue.index + 1
4228 assert residx_a == residx_b, (
4229 f"The sidechain distance between atom {self.traj.top.atom(a)} and "
4230 f"{self.traj.top.atom(b)} describes a distance between two residues "
4231 f"({residx_a} and {residx_b})."
4232 f"As sidechains belong always to a single residue, something is off. "
4233 )
4234 if residx_a not in visited_residues:
4235 visited_residues.add(residx_a)
4236 distance_index = 1
4237 labels.append(f"{self.prefix_label} {distance_index} {residx_a}")
4238 distance_index += 1
4239 return labels
4241 @property
4242 def name(self):
4243 """str: The name of the class: "SideChainBondDistances"."""
4244 return "SideChainBondDistances"
4246 @property
4247 def indexes(self):
4248 """np.ndarray: A (n_angles, 2) shaped numpy array giving the atom indices
4249 of the distances to be calculated."""
4250 return self.distance_indexes
4253class CentralAngles(AngleFeature):
4254 """Feature that collects all angles in the backbone of a topology.
4256 Attributes:
4257 top (mdtraj.Topology): Topology of this feature.
4258 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
4259 prefix_label (str): A prefix for the labels. In this case it is 'CENTERANGLE'.
4261 """
4263 prefix_label: str = "CENTERANGLE "
4265 def __init__(
4266 self,
4267 traj: Union[SingleTraj, TrajEnsemble],
4268 deg: bool = False,
4269 cossin: bool = False,
4270 periodic: bool = True,
4271 generic_labels: bool = False,
4272 check_aas: bool = True,
4273 delayed: bool = False,
4274 ) -> None:
4275 self.traj = traj
4276 select = traj.top.select("name CA or name C or name N")
4277 # add 4 bonds in KAC
4278 # if any([r.name == "KAC" for r in top.residues]):
4279 # self.top = add_KAC_backbone_bonds(self.top)
4280 bonds = np.vstack([[x.index for x in b] for b in traj.top.bonds])
4281 bond_names = np.vstack([[x for x in b] for b in traj.top.bonds])
4282 angle_indexes = []
4283 for a in select:
4284 where = np.where(bonds == a)
4285 possible_bonds = bonds[where[0], :]
4286 possible_bond_names = bond_names[where[0], :]
4287 where = np.isin(possible_bonds, select)
4288 hits = np.count_nonzero(np.all(where, axis=1))
4289 if hits <= 1:
4290 # atom is not part of any angles
4291 continue
4292 elif hits == 2:
4293 where = np.all(where, axis=1)
4294 these = np.unique(
4295 [traj.top.atom(i).index for i in possible_bonds[where, :].flatten()]
4296 )
4297 angle_indexes.append(these)
4298 elif hits == 3:
4299 a = traj.top.atom(a)
4300 bonds = "\n".join(
4301 [
4302 f"BOND {str(i):<10}-{str(j):>10}"
4303 for i, j in traj.top.bonds
4304 if i == a or j == a
4305 ]
4306 )
4307 raise Exception(
4308 f"The atom {a} takes part in three possible angles defined "
4309 f"by the C, CA, and N atoms:\n{bonds}."
4310 )
4311 elif hits == 4:
4312 raise Exception(
4313 "Can't deal with these angles. One atom is part of four possible angles"
4314 )
4315 else:
4316 raise Exception(
4317 "Can't deal with these angles. One atom is part of more than three angles"
4318 )
4320 angle_indexes = np.vstack(angle_indexes)
4321 angle_indexes = np.unique(angle_indexes, axis=0)
4322 if generic_labels:
4323 self.describe = self.generic_describe
4324 super().__init__(
4325 traj, angle_indexes, deg, cossin, periodic, check_aas, delayed=delayed
4326 )
4328 def generic_describe(self) -> list[str]:
4329 """Returns a list of generic labels, not containing residue names.
4330 These can be used to stack tops of different topology.
4332 Returns:
4333 list[str]: A list of labels.
4335 """
4336 if hasattr(self.traj, "clustal_w"):
4337 indices = []
4338 clustal_w_ = [*self.traj.clustal_w]
4339 clustal_w = [None] * (len(clustal_w_) * 3)
4340 clustal_w[::3] = clustal_w_
4341 clustal_w[1::3] = clustal_w_
4342 clustal_w[2::3] = clustal_w_
4343 i = 0
4344 for a, b, c in zip(clustal_w[:-2], clustal_w[1:-1], clustal_w[2:]):
4345 i += 1
4346 if a == "-":
4347 continue
4348 indices.append(i)
4349 indices = np.array(indices)
4350 else:
4351 indices = np.arange(len(self.angle_indexes)) + 1
4352 labels = []
4353 for i in indices:
4354 labels.append(f"{self.prefix_label}{i}")
4355 return labels
4357 def describe(self) -> list[str]:
4358 """Gives a list of strings describing this feature's feature-axis.
4360 A feature computes a collective variable (CV). A CV is aligned with an MD
4361 trajectory on the time/frame-axis. The feature axis is unique for every
4362 feature. A feature describing the backbone torsions (phi, omega, psi) would
4363 have a feature axis with the size 3*n-3, where n is the number of residues.
4364 The end-to-end distance of a linear protein in contrast would just have
4365 a feature axis with length 1. This `describe()` method will label these
4366 values unambiguously. A backbone torsion feature's `describe()` could be
4367 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
4368 The end-to-end distance feature could be described by
4369 ['distance_between_MET1_and_LYS80'].
4371 Returns:
4372 list[str]: The labels of this feature.
4374 """
4375 getlbl = (
4376 lambda at: f"ATOM {at.name:>4}:{at.index:>5} {at.residue.name}:{at.residue.resSeq:>4}"
4377 )
4378 labels = []
4379 for i, j, k in self.angle_indexes:
4380 i, j, k = self.top.atom(i), self.top.atom(j), self.top.atom(k)
4381 labels.append(
4382 f"{self.prefix_label}{getlbl(i)} ANGLE {getlbl(j)} ANGLE "
4383 f"{getlbl(k)} CHAIN "
4384 f"{int(np.unique([a.residue.chain.index for a in [i, j, k]])[0])}"
4385 )
4386 return labels
4388 @property
4389 def name(self) -> str:
4390 """str: The name of the class: "CentralAngles"."""
4391 return "CentralAngles"
4393 @property
4394 def indexes(self) -> np.ndarray:
4395 """np.ndarray: A (n_angles, 3) shaped numpy array giving the atom indices
4396 of the angles to be calculated."""
4397 return self.angle_indexes
4400class SideChainAngles(AngleFeature):
4401 """Feature that collects all angles not in the backbone of a topology.
4403 Attributes:
4404 top (mdtraj.Topology): Topology of this feature.
4405 indexes (np.ndarray): The numpy array returned from `top.select('all')`.
4406 prefix_label (str): A prefix for the labels. In this case it is 'SIDECHANGLE'.
4408 """
4410 prefix_label: str = "SIDECHANGLE "
4412 def __init__(
4413 self,
4414 traj: SingleTraj,
4415 deg: bool = False,
4416 cossin: bool = False,
4417 periodic: bool = True,
4418 check_aas: bool = True,
4419 generic_labels: bool = False,
4420 delayed: bool = False,
4421 ) -> None:
4422 self.traj = traj
4423 angle_indexes = []
4424 for residue, ind in traj._custom_top.sidechain_indices_by_residue():
4425 ind = np.vstack(
4426 [
4427 ind[:-2],
4428 ind[1:-1],
4429 ind[2:],
4430 ]
4431 ).T
4432 angle_indexes.append(ind)
4433 angle_indexes = np.vstack(angle_indexes)
4434 super().__init__(
4435 self.traj, angle_indexes, deg, cossin, periodic, check_aas, delayed=delayed
4436 )
4437 if generic_labels:
4438 self.describe = self.generic_describe
4440 def generic_describe(self) -> list[str]:
4441 """Returns a list of generic labels, not containing residue names.
4442 These can be used to stack tops of different topology.
4444 Returns:
4445 list[str]: A list of labels.
4447 """
4448 labels = []
4449 if hasattr(self.traj, "clustal_w"):
4450 raise NotImplementedError
4451 # indices = []
4452 # clustal_w_ = [*self.traj.clustal_w]
4453 # clustal_w = [None] * (len(clustal_w_) * 3)
4454 # clustal_w[::3] = clustal_w_
4455 # clustal_w[1::3] = clustal_w_
4456 # clustal_w[2::3] = clustal_w_
4457 # i = 0
4458 # for a, b, c in zip(clustal_w[:-2], clustal_w[1:-1], clustal_w[2:]):
4459 # i += 1
4460 # if a == "-":
4461 # continue
4462 # indices.append(i)
4463 # indices = np.array(indices)
4464 else:
4465 indices = self.angle_indexes
4466 visited_residues = set()
4467 for a, b, c in indices:
4468 residx_a = self.traj.top.atom(a).residue.index + 1
4469 residx_b = self.traj.top.atom(b).residue.index + 1
4470 residx_c = self.traj.top.atom(c).residue.index + 1
4471 assert residx_a == residx_b == residx_c, (
4472 f"The sidechain distance between atom {self.traj.top.atom(a)}, "
4473 f"{self.traj.top.atom(b)}, and {self.traj.top.atom(c)} describes "
4474 f"an angle between two or more residues ({residx_a}, {residx_b}, and {residx_c})."
4475 f"As sidechains belong always to a single residue, something is off. "
4476 )
4477 if residx_a not in visited_residues:
4478 visited_residues.add(residx_a)
4479 angle_index = 1
4480 labels.append(f"{self.prefix_label} {angle_index} {residx_a}")
4481 angle_index += 1
4482 return labels
4484 def describe(self):
4485 """Gives a list of strings describing this feature's feature-axis.
4487 A feature computes a collective variable (CV). A CV is aligned with an MD
4488 trajectory on the time/frame-axis. The feature axis is unique for every
4489 feature. A feature describing the backbone torsions (phi, omega, psi) would
4490 have a feature axis with the size 3*n-3, where n is the number of residues.
4491 The end-to-end distance of a linear protein in contrast would just have
4492 a feature axis with length 1. This `describe()` method will label these
4493 values unambiguously. A backbone torsion feature's `describe()` could be
4494 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1'].
4495 The end-to-end distance feature could be described by
4496 ['distance_between_MET1_and_LYS80'].
4498 Returns:
4499 list[str]: The labels of this feature.
4501 """
4502 getlbl = (
4503 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4}"
4504 )
4505 labels = []
4506 for i, j, k in self.angle_indexes:
4507 i, j, k = self.top.atom(i), self.top.atom(j), self.top.atom(k)
4508 labels.append(
4509 f"{self.prefix_label}{getlbl(i)} ANGLE {getlbl(j)} ANGLE {getlbl(k)} CHAIN {int(np.unique([a.residue.chain.index for a in [i, j, k]])[0])}"
4510 )
4511 return labels
4513 @property
4514 def name(self):
4515 """str: The name of the class: "SideChainAngles"."""
4516 return "SideChainAngles"
4518 @property
4519 def indexes(self):
4520 """np.ndarray: A (n_angles, 3) shaped numpy array giving the atom indices
4521 of the angles to be calculated."""
4522 return self.angle_indexes