Coverage for encodermap/loading/features.py: 15%

1293 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-12-31 16:54 +0100

1# -*- coding: utf-8 -*- 

2# encodermap/loading/features.py 

3################################################################################ 

4# EncoderMap: A python library for dimensionality reduction. 

5# 

6# Copyright 2019-2024 University of Konstanz and the Authors 

7# 

8# Authors: 

9# Kevin Sawade, Patricia Schwarz 

10# 

11# Encodermap is free software: you can redistribute it and/or modify 

12# it under the terms of the GNU Lesser General Public License as 

13# published by the Free Software Foundation, either version 2.1 

14# of the License, or (at your option) any later version. 

15# This package is distributed in the hope that it will be useful to other 

16# researches. IT DOES NOT COME WITH ANY WARRANTY WHATSOEVER; without even the 

17# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 

18# See the GNU Lesser General Public License for more details. 

19# 

20# See <http://www.gnu.org/licenses/>. 

21################################################################################ 

22"""Features contain topological information of proteins and other biomolecules. 

23 

24These topological information can be calculated once and then provided with 

25input coordinates to calculate frame-wise collective variables of MD simulations. 

26 

27The features in this module used to inherit from PyEMMA's features 

28(https://github.com/markovmodel/PyEMMA), but PyEMMA has since been archived. 

29 

30If using EncoderMap's featurization make sure to also cite PyEMMA, from which 

31a lot of this code was adopted:: 

32 

33 @article{scherer_pyemma_2015, 

34 author = {Scherer, Martin K. and Trendelkamp-Schroer, Benjamin 

35 and Paul, Fabian and Pérez-Hernández, Guillermo and Hoffmann, Moritz and 

36 Plattner, Nuria and Wehmeyer, Christoph and Prinz, Jan-Hendrik and Noé, Frank}, 

37 title = {{PyEMMA} 2: {A} {Software} {Package} for {Estimation}, 

38 {Validation}, and {Analysis} of {Markov} {Models}}, 

39 journal = {Journal of Chemical Theory and Computation}, 

40 volume = {11}, 

41 pages = {5525-5542}, 

42 year = {2015}, 

43 issn = {1549-9618}, 

44 shorttitle = {{PyEMMA} 2}, 

45 url = {http://dx.doi.org/10.1021/acs.jctc.5b00743}, 

46 doi = {10.1021/acs.jctc.5b00743}, 

47 urldate = {2015-10-19}, 

48 month = oct, 

49 } 

50 

51""" 

52 

53################################################################################ 

54# Imports 

55################################################################################ 

56 

57 

58# Future Imports at the top 

59from __future__ import annotations 

60 

61# Standard Library Imports 

62import inspect 

63import itertools 

64import warnings 

65from collections import deque 

66from collections.abc import Callable, Sequence 

67from copy import deepcopy 

68from typing import TYPE_CHECKING, Any, Final, Literal, Optional, TypeVar, Union 

69 

70# Third Party Imports 

71import numpy as np 

72from optional_imports import _optional_import 

73 

74 

75################################################################################ 

76# Typing 

77################################################################################ 

78 

79 

80if TYPE_CHECKING: 

81 # Third Party Imports 

82 import dask 

83 import mdtraj as md 

84 

85 # Encodermap imports 

86 from encodermap.trajinfo.info_all import TrajEnsemble 

87 from encodermap.trajinfo.info_single import SingleTraj 

88 from encodermap.trajinfo.trajinfo_utils import _AMINO_ACID_CODES 

89 

90 

91AllCartesiansType = TypeVar("AllCartesians", bound="Parent") 

92AllBondDistancesType = TypeVar("AllBondDistances", bound="Parent") 

93CentralCartesiansType = TypeVar("CentralCartesians", bound="Parent") 

94CentralBondDistancesType = TypeVar("CentralBondDistances", bound="Parent") 

95CentralAnglesType = TypeVar("CentralAngles", bound="Parent") 

96CentralDihedralsType = TypeVar("CentralDihedrals", bound="Parent") 

97SideChainCartesiansType = TypeVar("SideChainCartesians", bound="Parent") 

98SideChainBondDistancesType = TypeVar("SideChainBondDistances", bound="Parent") 

99SideChainAnglesType = TypeVar("SideChainAngles", bound="Parent") 

100SideChainDihedralsType = TypeVar("SideChainDihedrals", bound="Parent") 

101CustomFeatureType = TypeVar("CustomFeature", bound="Parent") 

102SelectionFeatureType = TypeVar("SelectionFeature", bound="Parent") 

103AngleFeatureType = TypeVar("AngleFeature", bound="Parent") 

104DihedralFeatureType = TypeVar("DihedralFeature", bound="Parent") 

105DistanceFeatureType = TypeVar("DistanceFeature", bound="Parent") 

106AlignFeatureType = TypeVar("AlignFeature", bound="Parent") 

107InverseDistanceFeatureType = TypeVar("InverseDistanceFeature", bound="Parent") 

108ContactFeatureType = TypeVar("ContactFeature", bound="Parent") 

109BackboneTorsionFeatureType = TypeVar("BackboneTorsionFeature", bound="Parent") 

110ResidueMinDistanceFeatureType = TypeVar("ResidueMinDistanceFeature", bound="Parent") 

111GroupCOMFeatureType = TypeVar("GroupCOMFeature", bound="Parent") 

112ResidueCOMFeatureType = TypeVar("ResidueCOMFeature", bound="Parent") 

113SideChainTorsionsType = TypeVar("SideChainTorsions", bound="Parent") 

114MinRmsdFeatureType = TypeVar("MinRmsdFeature", bound="Parent") 

115 

116 

117AnyFeature = Union[ 

118 AllCartesiansType, 

119 AllBondDistancesType, 

120 CentralCartesiansType, 

121 CentralBondDistancesType, 

122 CentralAnglesType, 

123 CentralDihedralsType, 

124 SideChainCartesiansType, 

125 SideChainBondDistancesType, 

126 SideChainAnglesType, 

127 SideChainDihedralsType, 

128 CustomFeatureType, 

129 SelectionFeatureType, 

130 AngleFeatureType, 

131 DihedralFeatureType, 

132 DistanceFeatureType, 

133 AlignFeatureType, 

134 InverseDistanceFeatureType, 

135 ContactFeatureType, 

136 BackboneTorsionFeatureType, 

137 ResidueMinDistanceFeatureType, 

138 GroupCOMFeatureType, 

139 ResidueCOMFeatureType, 

140 SideChainTorsionsType, 

141 MinRmsdFeatureType, 

142] 

143 

144 

145################################################################################ 

146# Optional Imports 

147################################################################################ 

148 

149 

150md = _optional_import("mdtraj") 

151_dist_mic = _optional_import("mdtraj", "geometry._geometry._dist_mic") 

152_dist = _optional_import("mdtraj", "geometry._geometry._dist") 

153_dihedral_mic = _optional_import("mdtraj", "geometry._geometry._dihedral_mic") 

154_dihedral = _optional_import("mdtraj", "geometry._geometry._dihedral") 

155_angle_mic = _optional_import("mdtraj", "geometry._geometry._angle_mic") 

156_angle = _optional_import("mdtraj", "geometry._geometry._angle") 

157box_vectors_to_lengths_and_angles = _optional_import( 

158 "mdtraj", "utils.unitcell.box_vectors_to_lengths_and_angles" 

159) 

160dask = _optional_import("dask") 

161 

162 

163################################################################################ 

164# Globals 

165################################################################################ 

166 

167__all__: list[str] = [ 

168 "AllCartesians", 

169 "AllBondDistances", 

170 "CentralCartesians", 

171 "CentralBondDistances", 

172 "CentralAngles", 

173 "CentralDihedrals", 

174 "SideChainCartesians", 

175 "SideChainBondDistances", 

176 "SideChainAngles", 

177 "SideChainDihedrals", 

178 "CustomFeature", 

179 "SelectionFeature", 

180 "AngleFeature", 

181 "DihedralFeature", 

182 "DistanceFeature", 

183 "AlignFeature", 

184 "InverseDistanceFeature", 

185 "ContactFeature", 

186 "BackboneTorsionFeature", 

187 "ResidueMinDistanceFeature", 

188 "GroupCOMFeature", 

189 "ResidueCOMFeature", 

190 "SideChainTorsions", 

191 "MinRmsdFeature", 

192] 

193 

194 

195PERIODIC_WARNING: bool = False 

196PYEMMA_CITATION_WARNING: bool = False 

197PYEMMA_FEATURES: list[str] = [ 

198 "SelectionFeature", 

199 "AngleFeature", 

200 "DihedralFeature", 

201 "DistanceFeature", 

202 "AlignFeature", 

203 "InverseDistanceFeature", 

204 "ContactFeature", 

205 "BackboneTorsionFeature", 

206 "ResidueMinDistanceFeature", 

207 "GroupCOMFeature", 

208 "ResidueCOMFeature", 

209 "SideChainTorsions", 

210 "MinRmsdFeature", 

211] 

212 

213 

214################################################################################ 

215# Functions 

216################################################################################ 

217 

218 

219def pair(*numbers: int) -> int: 

220 """ConvertGroup's (https://convertgroup.com/) implementation of 

221 Matthew Szudzik's pairing function (http://szudzik.com/ElegantPairing.pdf) 

222 

223 Maps a pair of non-negative integers to a uniquely associated single non-negative integer. 

224 Pairing also generalizes for `n` non-negative integers, by recursively mapping the first pair. 

225 For example, to map the following tuple: 

226 

227 Args: 

228 *numbers (int): Variable length integers. 

229 

230 Returns: 

231 int: The paired integer. 

232 

233 """ 

234 if len(numbers) < 2: 

235 raise ValueError("Szudzik pairing function needs at least 2 numbers as input") 

236 elif any((n < 0) or (not isinstance(n, int)) for n in numbers): 

237 raise ValueError( 

238 f"Szudzik pairing function maps only non-negative integers. In your " 

239 f"input, there seems to be negative or non-integer values: {numbers=}" 

240 ) 

241 

242 numbers = deque(numbers) 

243 

244 # fetch the first two numbers 

245 n1 = numbers.popleft() 

246 n2 = numbers.popleft() 

247 

248 if n1 != max(n1, n2): 

249 mapping = pow(n2, 2) + n1 

250 else: 

251 mapping = pow(n1, 2) + n1 + n2 

252 

253 mapping = int(mapping) 

254 

255 if not numbers: 

256 # recursion concludes 

257 return mapping 

258 else: 

259 numbers.appendleft(mapping) 

260 return pair(*numbers) 

261 

262 

263def unpair(number: int, n: int = 2) -> list[int]: 

264 """ConvertGroup's (https://convertgroup.com/) implementation of 

265 Matthew Szudzik's pairing function (http://szudzik.com/ElegantPairing.pdf) 

266 

267 The inverse function outputs the pair associated with a non-negative integer. 

268 Unpairing also generalizes by recursively unpairing a non-negative integer to 

269 `n` non-negative integers. 

270 

271 For example, to associate a `number` with three non-negative 

272 integers n_1, n_2, n_3, such that: 

273 

274 pairing(n_1, n_2, n_3) = `number` 

275 

276 the `number` will first be unpaired to n_p, n_3, then the n_p will be unpaired to n_1, n_2, 

277 producing the desired n_1, n_2 and n_3. 

278 

279 Args: 

280 number(int): The paired integer. 

281 n (int): How many integers are paired in `number`? 

282 

283 Returns: 

284 list[int]: A list of length `n` with the constituting ints. 

285 

286 """ 

287 if (number < 0) or (not isinstance(number, int)): 

288 raise ValueError("Szudzik unpairing function requires a non-negative integer") 

289 

290 if number - pow(np.floor(np.floor(number)), 2) < np.floor(np.floor(number)): 

291 

292 n1 = number - pow(np.floor(np.floor(number)), 2) 

293 n2 = np.floor(np.floor(number)) 

294 

295 else: 

296 n1 = np.floor(np.floor(number)) 

297 n2 = number - pow(np.floor(np.floor(number)), 2) - np.floor(np.floor(number)) 

298 

299 n1, n2 = int(n1), int(n2) 

300 

301 if n > 2: 

302 return [unpair(n1, n - 1) + (n2,)] 

303 else: 

304 # recursion concludes 

305 return [n1, n2] 

306 

307 

308def _check_aas(traj: SingleTraj) -> None: 

309 r = set([r.name for r in traj.top.residues]) 

310 diff = r - set(traj._custom_top.amino_acid_codes.keys()) 

311 if diff: 

312 raise Exception( 

313 f"I don't recognize these residues: {diff}. " 

314 f"Either add them to the `SingleTraj` or `TrajEnsemble` via " 

315 f"`traj.load_custom_topology(custom_aas)` or " 

316 f"`trajs.load_custom_topology(custom_aas)` " 

317 f"Or remove them from your trajectory. See the documentation of the " 

318 f"`em.CustomTopology` class. Here are the recognized residues:\n\n" 

319 f"{traj._custom_top.amino_acid_codes.keys()}" 

320 ) 

321 

322 

323def describe_last_feats(feat: AnyFeature, n: int = 5) -> None: 

324 """Prints the description of the last `n` features. 

325 

326 Args: 

327 feat (encodermap.Featurizer): An instance of a featurizer. 

328 n (int): The number of last features to describe. Default is 5. 

329 

330 """ 

331 for i, lbl in enumerate(feat.describe()[-n:]): 

332 print(lbl) 

333 

334 

335def _describe_atom( 

336 topology: md.Topology, 

337 index: int, 

338) -> str: 

339 """ 

340 Returns a string describing the given atom. 

341 

342 Args: 

343 topology (md.Topology): An MDTraj Topology. 

344 index (str): The index of the atom. 

345 

346 Return: 

347 str: A description of the atom. 

348 

349 """ 

350 atom = topology.atom(index) 

351 if topology.n_chains > 1: 

352 return f"{atom.residue.name} {atom.residue.resSeq} {atom.name} {atom.index} {atom.residue.chain.index}" 

353 else: 

354 return f"{atom.residue.name} {atom.residue.resSeq} {atom.name} {atom.index}" 

355 

356 

357################################################################################ 

358# Parent Classes 

359################################################################################ 

360 

361 

362class CitePYEMMAWarning(UserWarning): 

363 pass 

364 

365 

366class FeatureMeta(type): 

367 """Inspects the __init__ of classes and adds attributes to them based on 

368 their call signature. 

369 

370 If a feature uses the arguments `deg` or `omega` in 

371 its call signature, the instance will have the CLASS attributes `_use_angle` and 

372 `_use_omega` set to True. Otherwise, the instance will have them set as False. 

373 

374 This allows other functions that use these features to easily discern whether 

375 they need these arguments before instantiating the classes. 

376 

377 Example: 

378 >>> from encodermap.loading import features 

379 >>> f_class = getattr(features, "SideChainDihedrals") 

380 >>> f_class._use_angle 

381 True 

382 >>> f_class._use_omega 

383 False 

384 

385 """ 

386 

387 def __new__(cls, name, bases, dct): 

388 x = super().__new__(cls, name, bases, dct) 

389 args = inspect.getfullargspec(x.__init__) 

390 if args.varargs is not None: 390 ↛ 391line 390 didn't jump to line 391, because the condition on line 390 was never true

391 raise Exception(f"{x.__init__=} {x=} {args=} {args=}") 

392 args = args.args 

393 if "deg" in args: 

394 x._use_angle = True 

395 else: 

396 x._use_angle = False 

397 if "omega" in args: 

398 x._use_omega = True 

399 else: 

400 x._use_omega = False 

401 if "periodic" in args: 

402 x._use_periodic = True 

403 else: 

404 x._use_periodic = False 

405 x.atom_feature = False 

406 x._raise_on_unitcell = False 

407 return x 

408 

409 

410class Feature(metaclass=FeatureMeta): 

411 """Parent class to all feature classes. Implements the FeatureMeta, 

412 the transform method, and checks for unknown amino acids.. 

413 

414 This class implements functionality, that holds true for all features. 

415 The `transform()` method can be used by subclasses in two ways: 

416 * Provide all args with None. In this case, the traj in `self.traj` 

417 will be used to calculate the transformation. 

418 * Provide custom `xyz`, `unitcell_vectors`, and `unitcell_info`. In this 

419 case, 

420 

421 """ 

422 

423 def __init__( 

424 self, 

425 traj: Union[SingleTraj, TrajEnsemble], 

426 check_aas: bool = True, 

427 periodic: Optional[bool] = None, 

428 delayed: bool = False, 

429 ) -> None: 

430 self.traj = traj 

431 self._raise_on_unitcell = False 

432 if isinstance(self.traj.top, list): 

433 assert len(self.traj.top) == 1, ( 

434 f"The trajs in the features seem to have multiple toplogies: " 

435 f"{self.traj.top_files}. Features can only work with single " 

436 f"topologies." 

437 ) 

438 self.top = self.traj.top[0] 

439 else: 

440 self.top = self.traj.top 

441 if check_aas: 

442 _check_aas(traj) 

443 

444 self.delayed = delayed 

445 

446 if periodic is not None: 

447 if periodic and self.traj._have_unitcell: 

448 self.periodic = True 

449 elif periodic and not self.traj._have_unitcell: 

450 self.periodic = False 

451 self._raise_on_unitcell 

452 global PERIODIC_WARNING 

453 if not PERIODIC_WARNING: 

454 warnings.warn( 

455 f"You requested a `em.loading.features.Feature` to calculate " 

456 f"features in a periodic box, using the minimum image convention, " 

457 f"but the trajectory you provided does not have " 

458 f"unitcell information. If this feature will later be supplied " 

459 f"with trajectories with unitcell information, an Exception " 

460 f"will be raised, to make sure distances/angles are calculated " 

461 f"correctly.", 

462 stacklevel=2, 

463 ) 

464 PERIODIC_WARNING = True 

465 else: 

466 self.periodic = False 

467 

468 global PYEMMA_CITATION_WARNING 

469 if not PYEMMA_CITATION_WARNING and self.__class__.__name__ in PYEMMA_FEATURES: 

470 warnings.warn( 

471 message=( 

472 "EncoderMap's featurization uses code from the now deprecated " 

473 "python package PyEMMA (https://github.com/markovmodel/PyEMMA). " 

474 "Please make sure to also cite them, when using EncoderMap." 

475 ), 

476 category=CitePYEMMAWarning, 

477 ) 

478 PYEMMA_CITATION_WARNING = True 

479 

480 @property 

481 def dimension(self) -> int: 

482 """int: The dimension of the feature.""" 

483 return self._dim 

484 

485 @dimension.setter 

486 def dimension(self, val: Union[float, int]) -> None: 

487 self._dim = int(val) 

488 

489 def __eq__(self, other: AnyFeature) -> bool: 

490 if not issubclass(other.__class__, Feature): 

491 return False 

492 if not isinstance(other, self.__class__): 

493 return False 

494 if self.dimension != other.dimension: 

495 return False 

496 if self.traj is not None: 

497 if self.traj.top != other.traj.top: 

498 return False 

499 if hasattr(self, "ref"): 

500 if not np.allclose(self.ref.xyz, other.ref.xyz, rtol=1e-4): 

501 return False 

502 if hasattr(self, "scheme"): 

503 if self.scheme != other.scheme: 

504 return False 

505 if hasattr(self, "ignore_nonprotein"): 

506 if self.ignore_nonprotein != other.ignore_nonprotein: 

507 return False 

508 if hasattr(self, "periodic"): 

509 if self.periodic != other.periodic: 

510 return False 

511 if hasattr(self, "threshold"): 

512 if self.threshold != other.threshold: 

513 return False 

514 if hasattr(self, "group_definitions"): 

515 for self_group_def, other_group_def in zip( 

516 self.group_definitions, other.group_definitions 

517 ): 

518 if not np.array_equal(self_group_def, other_group_def): 

519 return False 

520 if hasattr(self, "group_pairs"): 

521 if not np.array_equal(self.group_pairs, other.group_pairs): 

522 return False 

523 if hasattr(self, "count_contacts"): 

524 if self.count_contacts != other.count_contacts: 

525 return False 

526 # Encodermap imports 

527 from encodermap.misc.xarray import _get_indexes_from_feat 

528 

529 try: 

530 self_index = _get_indexes_from_feat(self, self.traj) 

531 other_index = _get_indexes_from_feat(other, other.traj) 

532 if not np.array_equal(self_index, other_index): 

533 return False 

534 except AttributeError: 

535 pass 

536 return True 

537 

538 def transform( 

539 self, 

540 xyz: Optional[np.ndarray] = None, 

541 unitcell_vectors: Optional[np.ndarray] = None, 

542 unitcell_info: Optional[np.ndarray] = None, 

543 ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: 

544 """Carries out the computation of the CVs. 

545 

546 For featurization of single trajs, all arguments can be left None, 

547 and the values of the `traj` at class instantiation will be 

548 returned by this method. For ensembles with a single topology, but 

549 multiple trajectories, the xyz, unitcell_vectors, and unitcell_info 

550 should be provided accordingly. This parent class' `transform` then 

551 carries out checks (do all arguments provide the same number of frames, 

552 does the xyz array have the same number of atoms as the `traj` at 

553 instantiation, do the unitcell_angles coincide with the one of the 

554 parent traj, ...). Thus, it is generally advised to call this method 

555 with super() to run these checks. 

556 

557 Args: 

558 xyz (Optional[np.ndarray]): If None, the coordinates of the 

559 trajectory in provided as `traj`, when the feature was instantiated 

560 will be used. 

561 unitcell_vectors (Optional[np.ndarray]): If None, the unitcell vectors 

562 of the trajectory in provided as `traj`, when the feature was instantiated 

563 will be used. Unitcell_vectors are arrays with shape (n_frames, 3, 3), 

564 where the rows are the bravais vectors a, b, c. 

565 unitcell_info (Optional[np.ndarray]): If None, the unitcell info of 

566 the trajectory in provided as `traj`, when the feature was 

567 instantiated will be used. The unitcell_info is an array with 

568 shape (n_frames, 6), where the first three columns are the unitcell 

569 lengths in nm, the remaining columns are the unitcell angles in deg. 

570 

571 Returns: 

572 tuple: A tuple containing three np.ndarrays: 

573 - The xyz coordinates. 

574 - The unitcell_vectors 

575 - The unitcell_info 

576 

577 """ 

578 if self._raise_on_unitcell and ( 

579 unitcell_info is not None or unitcell_vectors is not None 

580 ): 

581 raise Exception( 

582 f"This feature was instantiated with the keyword argument `periodic=True`, " 

583 f"but the `SingleTraj` used for instantiation did not contain any unitcell " 

584 f"information. Now, unitcell_infos are fed into the `transform` " 

585 f"method of this feature. This behavior is not allowed. Make sure to " 

586 f"either specifically set `periodic=False` or fix the unitcells in " 

587 f"your trajectory files." 

588 ) 

589 if xyz is not None: 

590 input_atoms = xyz.shape[1] 

591 try: 

592 self_atoms = self.traj.xyz.shape[1] 

593 except AttributeError as e: 

594 raise Exception(f"{self=}") from e 

595 if hasattr(self, "periodic"): 

596 if self.periodic: 

597 assert unitcell_vectors is not None and unitcell_info is not None, ( 

598 f"When providing a `feature.transform` function with xyz " 

599 f"data, and setting {self.periodic=} to True, please " 

600 f"also provide `unitcell_vectors` and `unitcell_info` " 

601 f"to calculate distances/angles/dihedrals in periodic space." 

602 ) 

603 assert input_atoms == self_atoms, ( 

604 f"The shape of the input xyz coordinates is off from the expected " 

605 f"shape. The topology {self.top} defines {self_atoms} atoms. The " 

606 f"provided array has {xyz.shaope[1]=} atoms." 

607 ) 

608 else: 

609 xyz = self.traj.xyz.copy() 

610 if unitcell_vectors is not None: 

611 assert len(unitcell_vectors) == len(xyz), ( 

612 f"The shape of the provided `unitcell_vectors` is off from the " 

613 f"expected shape. The xyz data contains {len(xyz)=} frames, while " 

614 f"the `unitcell_vectors` contains {len(unitcell_vectors)=} frames." 

615 ) 

616 else: 

617 if self.traj._have_unitcell: 

618 unitcell_vectors = self.traj.unitcell_vectors.copy() 

619 else: 

620 unitcell_vectors = None 

621 if unitcell_info is not None: 

622 assert len(unitcell_info) == len(xyz), ( 

623 f"The shape of the provided `unitcell_info` is off from the " 

624 f"expected shape. The xyz data contains {len(xyz)=} frames, while " 

625 f"the `unitcell_info` contains {len(unitcell_info)=} frames." 

626 ) 

627 provided_orthogonal = np.allclose(unitcell_info[:, 3:], 90) 

628 self_orthogonal = np.allclose(self.traj.unitcell_angles, 90) 

629 assert provided_orthogonal == self_orthogonal, ( 

630 f"The trajectory you provided to `transform` and the one " 

631 f"this feature was instantiated with have different crystal " 

632 f"systems in their unitcells: {provided_orthogonal=} {self_orthogonal=}" 

633 ) 

634 else: 

635 if self.traj._have_unitcell: 

636 unitcell_info = np.hstack( 

637 [ 

638 self.traj.unitcell_lengths.copy(), 

639 self.traj.unitcell_angles.copy(), 

640 ] 

641 ) 

642 else: 

643 unitcell_info = None 

644 return xyz, unitcell_vectors, unitcell_info 

645 

646 

647class CustomFeature(Feature): 

648 delayed: bool = False 

649 _nonstandard_transform_args: list[str] = [ 

650 "top", 

651 "indexes", 

652 "delayed_call", 

653 "_fun", 

654 "_args", 

655 "_kwargs", 

656 ] 

657 _is_custom: Final[True] = True 

658 traj: Optional[SingleTraj] = None 

659 top: Optional[md.Topology] = None 

660 indexes: Optional[np.ndarray] = None 

661 _fun: Optional[Callable] = None 

662 _args: Optional[tuple[Any, ...]] = None 

663 _kwargs: Optional[dict[str, Any]] = None 

664 

665 def __init__( 

666 self, 

667 fun: Callable, 

668 dim: int, 

669 traj: Optional[SingleTraj] = None, 

670 description: Optional[str] = None, 

671 fun_args: tuple[Any, ...] = tuple(), 

672 fun_kwargs: dict[str, Any] = None, 

673 delayed: bool = False, 

674 ) -> None: 

675 self.id = None 

676 self.traj = traj 

677 self.indexes = None 

678 if fun_kwargs is None: 

679 fun_kwargs = {} 

680 self._fun = fun 

681 self._args = fun_args 

682 self._kwargs = fun_kwargs 

683 self._dim = dim 

684 self.desc = description 

685 self.delayed = delayed 

686 assert self._dim > 0, f"Feature dimensions need to be greater than 0." 

687 

688 def describe(self) -> list[str]: 

689 """Gives a list of strings describing this feature's feature-axis. 

690 

691 A feature computes a collective variable (CV). A CV is aligned with an MD 

692 trajectory on the time/frame-axis. The feature axis is unique for every 

693 feature. A feature describing the backbone torsions (phi, omega, psi) would 

694 have a feature axis with the size 3*n-3, where n is the number of residues. 

695 The end-to-end distance of a linear protein in contrast would just have 

696 a feature axis with length 1. This `describe()` method will label these 

697 values unambiguously. A backbone torsion feature's `describe()` could be 

698 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

699 The end-to-end distance feature could be described by 

700 ['distance_between_MET1_and_LYS80']. 

701 

702 Returns: 

703 list[str]: The labels of this feature. 

704 

705 """ 

706 if isinstance(self.desc, str): 

707 desc = [self.desc] 

708 elif self.desc is None: 

709 arg_str = ( 

710 f"{self._args}, {self._kwargs}" if self._kwargs else f"{self._args}" 

711 ) 

712 desc = [f"CustomFeature_{self.id} calling {self._fun} with args {arg_str}"] 

713 elif self.desc and not (len(self.desc) == self._dim or len(self.desc) == 1): 

714 raise ValueError( 

715 f"to avoid confusion, ensure the lengths of 'description' " 

716 f"list matches dimension - or give a single element which will be repeated." 

717 f"Input was {self.desc}" 

718 ) 

719 

720 if len(desc) == 1 and self.dimension > 0: 

721 desc *= self.dimension 

722 

723 return desc 

724 

725 @property 

726 def dask_indices(self): 

727 """str: The name of the delayed transformation to carry out with this feature.""" 

728 return "indexes" 

729 

730 @staticmethod 

731 @dask.delayed 

732 def dask_transform( 

733 top: md.Topology, 

734 indexes: np.ndarray, 

735 delayed_call: Optional[Callable] = None, 

736 _fun: Optional[Callable] = None, 

737 _args: Optional[Sequence[Any]] = None, 

738 _kwargs: Optional[dict[str, Any]] = None, 

739 xyz: Optional[np.ndarray] = None, 

740 unitcell_vectors: Optional[np.ndarray] = None, 

741 unitcell_info: Optional[np.ndarray] = None, 

742 ) -> np.ndarray: 

743 """The CustomFeature dask transfrom is still under development.""" 

744 if unitcell_info is not None: 

745 traj = md.Trajectory( 

746 xyz=xyz, 

747 topology=top, 

748 unitcell_lengths=unitcell_info[:, :3], 

749 unitcell_angles=unitcell_info[:, 3:], 

750 ) 

751 else: 

752 traj = md.Trajectory( 

753 xyz=xyz, 

754 topology=top, 

755 ) 

756 

757 if _kwargs is None: 

758 _kwargs = {} 

759 

760 if delayed_call is not None: 

761 return delayed_call(traj, indexes, **_kwargs) 

762 else: 

763 if _args is None: 

764 _args = tuple() 

765 return _fun(traj, *_args, **_kwargs) 

766 

767 def transform( 

768 self, 

769 traj: Optional[md.Trajectory] = None, 

770 xyz: Optional[np.ndarray] = None, 

771 unitcell_vectors: Optional[np.ndarray] = None, 

772 unitcell_info: Optional[np.ndarray] = None, 

773 ) -> np.ndarray: 

774 """Takes xyz and unitcell information to apply the topological calculations on. 

775 

776 When this method is not provided with any input, it will take the 

777 traj_container provided as `traj` in the `__init__()` method and transforms 

778 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer 

779 of a trajectory with identical topology as `self.traj`. If `periodic` was 

780 set to True, `unitcell_vectors` and `unitcell_info` should also be provided. 

781 

782 Args: 

783 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

784 in nanometer. If None is provided, the coordinates of `self.traj` 

785 will be used. Otherwise, the topology of this set of xyz 

786 coordinates should match the topology of `self.atom`. 

787 Defaults to None. 

788 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

789 True, the `unitcell_vectors` are needed to calculate the 

790 minimum image convention in a periodic space. This numpy 

791 array should have the shape (n_frames, 3, 3). The rows of this 

792 array correlate to the Bravais vectors a, b, and c. 

793 unitcell_info (Optional[np.ndarray]): Basically identical to 

794 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

795 the first three columns are the unitcell_lengths in nanometer. 

796 The other three columns are the unitcell_angles in degrees. 

797 

798 Returns: 

799 np.ndarray: The result of the computation with shape (n_frames, n_indexes). 

800 

801 """ 

802 if xyz is not None: 

803 self.traj = traj 

804 xyz, unitcell_vectors, unitcell_info = super().transform( 

805 xyz, unitcell_vectors, unitcell_info 

806 ) 

807 if unitcell_info is not None: 

808 traj = md.Trajectory( 

809 xyz=xyz, 

810 topology=self.traj.top, 

811 unitcell_lengths=unitcell_info[:, :3], 

812 unitcell_angles=unitcell_info[:, 3:], 

813 ) 

814 else: 

815 traj = md.Trajectory( 

816 xyz=xyz, 

817 topology=self.traj.top, 

818 ) 

819 if hasattr(self, "call"): 

820 if xyz is None: 

821 traj = md.Trajectory( 

822 xyz=self.traj.xyz.copy(), 

823 topology=self.traj.top, 

824 unitcell_lengths=deepcopy(traj.traj.unitcell_lengths), 

825 unitcell_angles=deepcopy(traj.traj.unitcell_angles), 

826 ) 

827 return self.call(traj) 

828 feature = self._fun(traj, *self._args, **self._kwargs) 

829 if not isinstance(feature, np.ndarray): 

830 raise ValueError("Your function should return a NumPy array!") 

831 return feature 

832 

833 

834class SelectionFeature(Feature): 

835 prefix_label: str = "ATOM:" 

836 

837 def __init__( 

838 self, 

839 traj: Union[SingleTraj, TrajEnsemble], 

840 indexes: Sequence[int], 

841 check_aas: bool = True, 

842 delayed: bool = False, 

843 ) -> None: 

844 super().__init__(traj, check_aas, delayed=delayed) 

845 self.indexes = np.asarray(indexes).astype("int32") 

846 if len(self.indexes) == 0: 

847 raise ValueError(f"Empty indices in {self.__class__.__name__}.") 

848 self.dimension = 3 * len(self.indexes) 

849 

850 def describe(self) -> list[str]: 

851 """Gives a list of strings describing this feature's feature-axis. 

852 

853 A feature computes a collective variable (CV). A CV is aligned with an MD 

854 trajectory on the time/frame-axis. The feature axis is unique for every 

855 feature. A feature describing the backbone torsions (phi, omega, psi) would 

856 have a feature axis with the size 3*n-3, where n is the number of residues. 

857 The end-to-end distance of a linear protein in contrast would just have 

858 a feature axis with length 1. This `describe()` method will label these 

859 values unambiguously. A backbone torsion feature's `describe()` could be 

860 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

861 The end-to-end distance feature could be described by 

862 ['distance_between_MET1_and_LYS80']. 

863 

864 Returns: 

865 list[str]: The labels of this feature. 

866 

867 """ 

868 labels = [] 

869 for i in self.indexes: 

870 labels.append(f"{self.prefix_label}{_describe_atom(self.top, i)} x") 

871 labels.append(f"{self.prefix_label}{_describe_atom(self.top, i)} y") 

872 labels.append(f"{self.prefix_label}{_describe_atom(self.top, i)} z") 

873 return labels 

874 

875 @property 

876 def dask_indices(self) -> str: 

877 """str: The name of the delayed transformation to carry out with this feature.""" 

878 return "indexes" 

879 

880 @staticmethod 

881 @dask.delayed 

882 def dask_transform( 

883 indexes: np.ndarray, 

884 xyz: Optional[np.ndarray] = None, 

885 unitcell_vectors: Optional[np.ndarray] = None, 

886 unitcell_info: Optional[np.ndarray] = None, 

887 ) -> dask.delayed: 

888 """The same as `transform()` but without the need to pickle `traj`. 

889 

890 When dask delayed concurrencies are distributed, required python objects 

891 are pickled. Thus, every feature needs to have its own pickled traj. 

892 That defeats the purpose of dask distributed. Thus, this method implements 

893 the same calculations as `transform` as a more barebones approach. 

894 It foregoes the checks for periodicity and unit-cell shape and just 

895 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a 

896 staticmethod, so it doesn't require `self` to function. However, it 

897 needs the indexes in `self.indexes`. That's why the `dask_indices` 

898 property informs the scheduler to also pickle and pass this object to 

899 the workers. 

900 

901 Args: 

902 indexes (np.ndarray): A numpy array with shape (n, ) giving the 

903 0-based index of the atoms which positions should be returned. 

904 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

905 in nanometer. If None is provided, the coordinates of `self.traj` 

906 will be used. Otherwise, the topology of this set of xyz 

907 coordinates should match the topology of `self.atom`. 

908 Defaults to None. 

909 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

910 True, the `unitcell_vectors` are needed to calculate the 

911 minimum image convention in a periodic space. This numpy 

912 array should have the shape (n_frames, 3, 3). The rows of this 

913 array correlate to the Bravais vectors a, b, and c. 

914 unitcell_info (Optional[np.ndarray]): Basically identical to 

915 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

916 the first three columns are the unitcell_lengths in nanometer. 

917 The other three columns are the unitcell_angles in degrees. 

918 

919 """ 

920 newshape = (xyz.shape[0], 3 * indexes.shape[0]) 

921 result = np.reshape(xyz[:, indexes, :], newshape) 

922 return result 

923 

924 def transform( 

925 self, 

926 xyz: Optional[np.ndarray] = None, 

927 unitcell_vectors: Optional[np.ndarray] = None, 

928 unitcell_info: Optional[np.ndarray] = None, 

929 ) -> np.ndarray: 

930 """Takes xyz and unitcell information to apply the topological calculations on. 

931 

932 When this method is not provided with any input, it will take the 

933 traj_container provided as `traj` in the `__init__()` method and transforms 

934 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer 

935 of a trajectory with identical topology as `self.traj`. If `periodic` was 

936 set to True, `unitcell_vectors` and `unitcell_info` should also be provided. 

937 

938 Args: 

939 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

940 in nanometer. If None is provided, the coordinates of `self.traj` 

941 will be used. Otherwise, the topology of this set of xyz 

942 coordinates should match the topology of `self.atom`. 

943 Defaults to None. 

944 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

945 True, the `unitcell_vectors` are needed to calculate the 

946 minimum image convention in a periodic space. This numpy 

947 array should have the shape (n_frames, 3, 3). The rows of this 

948 array correlate to the Bravais vectors a, b, and c. 

949 unitcell_info (Optional[np.ndarray]): Basically identical to 

950 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

951 the first three columns are the unitcell_lengths in nanometer. 

952 The other three columns are the unitcell_angles in degrees. 

953 

954 Returns: 

955 np.ndarray: The result of the computation with shape (n_frames, n_indexes). 

956 

957 """ 

958 xyz, unitcell_vectors, unitcell_info = super().transform( 

959 xyz, unitcell_vectors, unitcell_info 

960 ) 

961 newshape = (xyz.shape[0], 3 * self.indexes.shape[0]) 

962 result = np.reshape(xyz[:, self.indexes, :], newshape) 

963 return result 

964 

965 

966class AngleFeature(Feature): 

967 def __init__( 

968 self, 

969 traj: Union[SingleTraj, TrajEnsemble], 

970 angle_indexes: np.ndarray, 

971 deg: bool = False, 

972 cossin: bool = False, 

973 periodic: bool = True, 

974 check_aas: bool = True, 

975 delayed: bool = False, 

976 ) -> None: 

977 """Instantiate the `AngleFeature` class. 

978 

979 Args: 

980 traj (Union[SingleTraj, TrajEnsemble]): The trajectory container 

981 which topological information will be used to build the angles. 

982 angle_indexes (np.ndarray): A numpy array with shape (n_dihedrals, 4), 

983 that indexes the 3-tuples of atoms that will be used for 

984 the angle calculation. 

985 deg (bool): Whether to return the dihedrals in degree (True) or 

986 in radian (False). Defaults to False. 

987 cossin (bool): Whether to return the angles (False) or tuples of their 

988 cos and sin values (True). Defaults to False. 

989 periodic (bool): Whether to observe the minimum image convention 

990 and respect proteins breaking over the periodic boundary 

991 condition as a whole (True). In this case, the trajectory container 

992 in `traj` needs to have unitcell information. Defaults to True. 

993 check_aas (bool): Whether to check if all aas in `traj.top` are 

994 recognized. Defaults to True. 

995 

996 """ 

997 self.angle_indexes = np.array(angle_indexes).astype("int32") 

998 if len(self.angle_indexes) == 0: 

999 raise ValueError("empty indices") 

1000 self.deg = deg 

1001 self.cossin = cossin 

1002 self.dimension = len(self.angle_indexes) 

1003 if cossin: 

1004 self.dimension *= 2 

1005 super().__init__(traj, check_aas, periodic=periodic, delayed=delayed) 

1006 

1007 def describe(self) -> list[str]: 

1008 """Gives a list of strings describing this feature's feature-axis. 

1009 

1010 A feature computes a collective variable (CV). A CV is aligned with an MD 

1011 trajectory on the time/frame-axis. The feature axis is unique for every 

1012 feature. A feature describing the backbone torsions (phi, omega, psi) would 

1013 have a feature axis with the size 3*n-3, where n is the number of residues. 

1014 The end-to-end distance of a linear protein in contrast would just have 

1015 a feature axis with length 1. This `describe()` method will label these 

1016 values unambiguously. A backbone torsion feature's `describe()` could be 

1017 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

1018 The end-to-end distance feature could be described by 

1019 ['distance_between_MET1_and_LYS80']. 

1020 

1021 Returns: 

1022 list[str]: The labels of this feature. 

1023 

1024 """ 

1025 if self.cossin: 

1026 sin_cos = ("ANGLE: COS(%s - %s - %s)", "ANGLE: SIN(%s - %s - %s)") 

1027 labels = [ 

1028 s 

1029 % ( 

1030 _describe_atom(self.top, triple[0]), 

1031 _describe_atom(self.top, triple[1]), 

1032 _describe_atom(self.top, triple[2]), 

1033 ) 

1034 for triple in self.angle_indexes 

1035 for s in sin_cos 

1036 ] 

1037 else: 

1038 labels = [ 

1039 "ANGLE: %s - %s - %s " 

1040 % ( 

1041 _describe_atom(self.top, triple[0]), 

1042 _describe_atom(self.top, triple[1]), 

1043 _describe_atom(self.top, triple[2]), 

1044 ) 

1045 for triple in self.angle_indexes 

1046 ] 

1047 return labels 

1048 

1049 @property 

1050 def dask_indices(self) -> str: 

1051 """str: The name of the delayed transformation to carry out with this feature.""" 

1052 return "angle_indexes" 

1053 

1054 @staticmethod 

1055 @dask.delayed 

1056 def dask_transform( 

1057 indexes: np.ndarray, 

1058 periodic: bool, 

1059 deg: bool, 

1060 cossin: bool, 

1061 xyz: Optional[np.ndarray] = None, 

1062 unitcell_vectors: Optional[np.ndarray] = None, 

1063 unitcell_info: Optional[np.ndarray] = None, 

1064 ) -> dask.delayed: 

1065 """The same as `transform()` but without the need to pickle `traj`. 

1066 

1067 When dask delayed concurrencies are distributed, required python objects 

1068 are pickled. Thus, every feature needs to have its own pickled traj. 

1069 That defeats the purpose of dask distributed. Thus, this method implements 

1070 the same calculations as `transform` as a more barebones approach. 

1071 It foregoes the checks for periodicity and unit-cell shape and just 

1072 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a 

1073 staticmethod, so it doesn't require `self` to function. However, it 

1074 needs the indexes in `self.indexes`. That's why the `dask_indices` 

1075 property informs the scheduler to also pickle and pass this object to 

1076 the workers. 

1077 

1078 Args: 

1079 indexes (np.ndarray): A numpy array with shape (n, ) giving the 

1080 0-based index of the atoms which positions should be returned. 

1081 periodic (bool): Whether to observe the minimum image convention 

1082 and respect proteins breaking over the periodic boundary 

1083 condition as a whole (True). In this case, the trajectory container 

1084 in `traj` needs to have unitcell information. Defaults to True. 

1085 deg (bool): Whether to return the result in degree (`deg=True`) or in 

1086 radians (`deg=False`). Defaults to False (radians). 

1087 cossin (bool): If True, each angle will be returned as a pair of 

1088 (sin(x), cos(x)). This is useful, if you calculate the means 

1089 (e.g. TICA/PCA, clustering) in that space. Defaults to False. 

1090 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

1091 in nanometer. If None is provided, the coordinates of `self.traj` 

1092 will be used. Otherwise, the topology of this set of xyz 

1093 coordinates should match the topology of `self.atom`. 

1094 Defaults to None. 

1095 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

1096 True, the `unitcell_vectors` are needed to calculate the 

1097 minimum image convention in a periodic space. This numpy 

1098 array should have the shape (n_frames, 3, 3). The rows of this 

1099 array correlate to the Bravais vectors a, b, and c. 

1100 unitcell_info (Optional[np.ndarray]): Basically identical to 

1101 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

1102 the first three columns are the unitcell_lengths in nanometer. 

1103 The other three columns are the unitcell_angles in degrees. 

1104 

1105 """ 

1106 if periodic: 

1107 assert unitcell_vectors is not None 

1108 if unitcell_info is None: 

1109 # convert to angles 

1110 unitcell_angles = [] 

1111 for fr_unitcell_vectors in unitcell_vectors: 

1112 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles( 

1113 fr_unitcell_vectors[0], 

1114 fr_unitcell_vectors[1], 

1115 fr_unitcell_vectors[2], 

1116 ) 

1117 unitcell_angles.append(np.array([alpha, beta, gamma])) 

1118 else: 

1119 unitcell_angles = unitcell_info[:, 3:] 

1120 # check for an orthogonal box 

1121 orthogonal = np.allclose(unitcell_angles, 90) 

1122 

1123 out = np.empty((xyz.shape[0], indexes.shape[0]), dtype="float32", order="C") 

1124 _angle_mic( 

1125 xyz, 

1126 indexes, 

1127 unitcell_vectors.transpose(0, 2, 1).copy(), 

1128 out, 

1129 orthogonal, 

1130 ) 

1131 else: 

1132 out = np.empty((xyz.shape[0], indexes.shape[0]), dtype="float32", order="C") 

1133 _angle(xyz, indexes, out) 

1134 if cossin: 

1135 out = np.dstack((np.cos(out), np.sin(out))) 

1136 out = out.reshape(out.shape[0], out.shape[1] * out.shape[2]) 

1137 if deg and not cossin: 

1138 out = np.rad2deg(out) 

1139 return out 

1140 

1141 def transform( 

1142 self, 

1143 xyz: Optional[np.ndarray] = None, 

1144 unitcell_vectors: Optional[np.ndarray] = None, 

1145 unitcell_info: Optional[np.ndarray] = None, 

1146 ) -> np.ndarray: 

1147 """Takes xyz and unitcell information to apply the topological calculations on. 

1148 

1149 When this method is not provided with any input, it will take the 

1150 traj_container provided as `traj` in the `__init__()` method and transforms 

1151 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer 

1152 of a trajectory with identical topology as `self.traj`. If `periodic` was 

1153 set to True, `unitcell_vectors` and `unitcell_info` should also be provided. 

1154 

1155 Args: 

1156 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

1157 in nanometer. If None is provided, the coordinates of `self.traj` 

1158 will be used. Otherwise, the topology of this set of xyz 

1159 coordinates should match the topology of `self.atom`. 

1160 Defaults to None. 

1161 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

1162 True, the `unitcell_vectors` are needed to calculate the 

1163 minimum image convention in a periodic space. This numpy 

1164 array should have the shape (n_frames, 3, 3). The rows of this 

1165 array correlate to the Bravais vectors a, b, and c. 

1166 unitcell_info (Optional[np.ndarray]): Basically identical to 

1167 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

1168 the first three columns are the unitcell_lengths in nanometer. 

1169 The other three columns are the unitcell_angles in degrees. 

1170 

1171 Returns: 

1172 np.ndarray: The result of the computation with shape (n_frames, n_indexes). 

1173 

1174 """ 

1175 if xyz is not None: 

1176 periodic = self.periodic 

1177 else: 

1178 periodic = self.periodic and self.traj._have_unitcell 

1179 xyz, unitcell_vectors, unitcell_info = super().transform( 

1180 xyz, unitcell_vectors, unitcell_info 

1181 ) 

1182 if periodic: 

1183 assert unitcell_vectors is not None 

1184 if unitcell_info is None: 

1185 # convert to angles 

1186 unitcell_angles = [] 

1187 for fr_unitcell_vectors in unitcell_vectors: 

1188 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles( 

1189 fr_unitcell_vectors[0], 

1190 fr_unitcell_vectors[1], 

1191 fr_unitcell_vectors[2], 

1192 ) 

1193 unitcell_angles.append(np.array([alpha, beta, gamma])) 

1194 else: 

1195 unitcell_angles = unitcell_info[:, 3:] 

1196 # check for an orthogonal box 

1197 orthogonal = np.allclose(unitcell_angles, 90) 

1198 

1199 out = np.empty( 

1200 (xyz.shape[0], self.angle_indexes.shape[0]), dtype="float32", order="C" 

1201 ) 

1202 _angle_mic( 

1203 xyz, 

1204 self.angle_indexes, 

1205 unitcell_vectors.transpose(0, 2, 1).copy(), 

1206 out, 

1207 orthogonal, 

1208 ) 

1209 else: 

1210 out = np.empty( 

1211 (xyz.shape[0], self.angle_indexes.shape[0]), dtype="float32", order="C" 

1212 ) 

1213 _angle(xyz, self.angle_indexes, out) 

1214 if self.cossin: 

1215 out = np.dstack((np.cos(out), np.sin(out))) 

1216 out = out.reshape(out.shape[0], out.shape[1] * out.shape[2]) 

1217 if self.deg and not self.cossin: 

1218 out = np.rad2deg(out) 

1219 return out 

1220 

1221 

1222class DihedralFeature(AngleFeature): 

1223 """Dihedrals are torsion angles defined by four atoms.""" 

1224 

1225 def __init__( 

1226 self, 

1227 traj: Union[SingleTraj, TrajEnsemble], 

1228 dih_indexes: np.ndarray, 

1229 deg: bool = False, 

1230 cossin: bool = False, 

1231 periodic: bool = True, 

1232 check_aas: bool = True, 

1233 delayed: bool = False, 

1234 ) -> None: 

1235 """Instantiate the `DihedralFeature` class. 

1236 

1237 Args: 

1238 traj (Union[SingleTraj, TrajEnsemble]): The trajectory container 

1239 which topological information will be used to build the dihedrals. 

1240 dih_indexes (np.ndarray): A numpy array with shape (n_dihedrals, 4), 

1241 that indexes the 4-tuples of atoms that will be used for 

1242 the dihedral calculation. 

1243 deg (bool): Whether to return the dihedrals in degree (True) or 

1244 in radian (False). Defaults to False. 

1245 cossin (bool): Whether to return the angles (False) or tuples of their 

1246 cos and sin values (True). Defaults to False. 

1247 periodic (bool): Whether to observe the minimum image convention 

1248 and respect proteins breaking over the periodic boundary 

1249 condition as a whole (True). In this case, the trajectory container 

1250 in `traj` needs to have unitcell information. Defaults to True. 

1251 check_aas (bool): Whether to check if all aas in `traj.top` are 

1252 recognized. Defaults to True. 

1253 

1254 """ 

1255 super().__init__( 

1256 traj=traj, 

1257 angle_indexes=dih_indexes, 

1258 deg=deg, 

1259 cossin=cossin, 

1260 periodic=periodic, 

1261 check_aas=check_aas, 

1262 delayed=delayed, 

1263 ) 

1264 

1265 def describe(self) -> list[str]: 

1266 """Gives a list of strings describing this feature's feature-axis. 

1267 

1268 A feature computes a collective variable (CV). A CV is aligned with an MD 

1269 trajectory on the time/frame-axis. The feature axis is unique for every 

1270 feature. A feature describing the backbone torsions (phi, omega, psi) would 

1271 have a feature axis with the size 3*n-3, where n is the number of residues. 

1272 The end-to-end distance of a linear protein in contrast would just have 

1273 a feature axis with length 1. This `describe()` method will label these 

1274 values unambiguously. A backbone torsion feature's `describe()` could be 

1275 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

1276 The end-to-end distance feature could be described by 

1277 ['distance_between_MET1_and_LYS80']. 

1278 

1279 Returns: 

1280 list[str]: The labels of this feature. The length 

1281 is determined by the `dih_indexes` and the `cossin` argument 

1282 in the `__init__()` method. If `cossin` is false, then 

1283 `len(describe()) == self.angle_indexes[-1]`, else `len(describe())` 

1284 is twice as long. 

1285 

1286 """ 

1287 if self.cossin: 

1288 sin_cos = ("DIH: COS(%s - %s - %s - %s)", "DIH: SIN(%s - %s - %s - %s)") 

1289 labels = [ 

1290 s 

1291 % ( 

1292 _describe_atom(self.top, quad[0]), 

1293 _describe_atom(self.top, quad[1]), 

1294 _describe_atom(self.top, quad[2]), 

1295 _describe_atom(self.top, quad[3]), 

1296 ) 

1297 for quad in self.angle_indexes 

1298 for s in sin_cos 

1299 ] 

1300 else: 

1301 labels = [ 

1302 "DIH: %s - %s - %s - %s " 

1303 % ( 

1304 _describe_atom(self.top, quad[0]), 

1305 _describe_atom(self.top, quad[1]), 

1306 _describe_atom(self.top, quad[2]), 

1307 _describe_atom(self.top, quad[3]), 

1308 ) 

1309 for quad in self.angle_indexes 

1310 ] 

1311 return labels 

1312 

1313 @staticmethod 

1314 @dask.delayed 

1315 def dask_transform( 

1316 indexes: np.ndarray, 

1317 periodic: bool, 

1318 deg: bool, 

1319 cossin: bool, 

1320 xyz: Optional[np.ndarray] = None, 

1321 unitcell_vectors: Optional[np.ndarray] = None, 

1322 unitcell_info: Optional[np.ndarray] = None, 

1323 ) -> dask.delayed: 

1324 """The same as `transform()` but without the need to pickle `traj`. 

1325 

1326 When dask delayed concurrencies are distributed, required python objects 

1327 are pickled. Thus, every feature needs to have its own pickled traj. 

1328 That defeats the purpose of dask distributed. Thus, this method implements 

1329 the same calculations as `transform` as a more barebones approach. 

1330 It foregoes the checks for periodicity and unit-cell shape and just 

1331 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a 

1332 staticmethod, so it doesn't require `self` to function. However, it 

1333 needs the indexes in `self.indexes`. That's why the `dask_indices` 

1334 property informs the scheduler to also pickle and pass this object to 

1335 the workers. 

1336 

1337 Args: 

1338 indexes (np.ndarray): A numpy array with shape (n, ) giving the 

1339 0-based index of the atoms which positions should be returned. 

1340 periodic (bool): Whether to observe the minimum image convention 

1341 and respect proteins breaking over the periodic boundary 

1342 condition as a whole (True). In this case, the trajectory container 

1343 in `traj` needs to have unitcell information. Defaults to True. 

1344 deg (bool): Whether to return the result in degree (`deg=True`) or in 

1345 radians (`deg=False`). Defaults to False (radians). 

1346 cossin (bool): If True, each angle will be returned as a pair of 

1347 (sin(x), cos(x)). This is useful, if you calculate the means 

1348 (e.g. TICA/PCA, clustering) in that space. Defaults to False. 

1349 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

1350 in nanometer. If None is provided, the coordinates of `self.traj` 

1351 will be used. Otherwise, the topology of this set of xyz 

1352 coordinates should match the topology of `self.atom`. 

1353 Defaults to None. 

1354 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

1355 True, the `unitcell_vectors` are needed to calculate the 

1356 minimum image convention in a periodic space. This numpy 

1357 array should have the shape (n_frames, 3, 3). The rows of this 

1358 array correlate to the Bravais vectors a, b, and c. 

1359 unitcell_info (Optional[np.ndarray]): Basically identical to 

1360 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

1361 the first three columns are the unitcell_lengths in nanometer. 

1362 The other three columns are the unitcell_angles in degrees. 

1363 

1364 """ 

1365 if periodic: 

1366 assert unitcell_vectors is not None 

1367 # convert to angles 

1368 if unitcell_info is None: 

1369 unitcell_angles = [] 

1370 for fr_unitcell_vectors in unitcell_vectors: 

1371 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles( 

1372 fr_unitcell_vectors[0], 

1373 fr_unitcell_vectors[1], 

1374 fr_unitcell_vectors[2], 

1375 ) 

1376 unitcell_angles.append(np.array([alpha, beta, gamma])) 

1377 else: 

1378 unitcell_angles = unitcell_info[:, 3:] 

1379 

1380 # check for an orthogonal box 

1381 orthogonal = np.allclose(unitcell_angles, 90) 

1382 

1383 out = np.empty((xyz.shape[0], indexes.shape[0]), dtype="float32", order="C") 

1384 _dihedral_mic( 

1385 xyz, 

1386 indexes, 

1387 unitcell_vectors.transpose(0, 2, 1).copy(), 

1388 out, 

1389 orthogonal, 

1390 ) 

1391 

1392 else: 

1393 out = np.empty((xyz.shape[0], indexes.shape[0]), dtype="float32", order="C") 

1394 _dihedral(xyz, indexes, out) 

1395 

1396 if cossin: 

1397 out = np.dstack((np.cos(out), np.sin(out))) 

1398 out = out.reshape(out.shape[0], out.shape[1] * out.shape[2]) 

1399 

1400 if deg: 

1401 out = np.rad2deg(out) 

1402 return out 

1403 

1404 def transform( 

1405 self, 

1406 xyz: Optional[np.ndarray] = None, 

1407 unitcell_vectors: Optional[np.ndarray] = None, 

1408 unitcell_info: Optional[np.ndarray] = None, 

1409 ) -> np.ndarray: 

1410 """Takes xyz and unitcell information to apply the topological calculations on. 

1411 

1412 When this method is not provided with any input, it will take the 

1413 traj_container provided as `traj` in the `__init__()` method and transforms 

1414 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer 

1415 of a trajectory with identical topology as `self.traj`. If `periodic` was 

1416 set to True, `unitcell_vectors` and `unitcell_info` should also be provided. 

1417 

1418 Args: 

1419 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

1420 in nanometer. If None is provided, the coordinates of `self.traj` 

1421 will be used. Otherwise, the topology of this set of xyz 

1422 coordinates should match the topology of `self.atom`. 

1423 Defaults to None. 

1424 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

1425 True, the `unitcell_vectors` are needed to calculate the 

1426 minimum image convention in a periodic space. This numpy 

1427 array should have the shape (n_frames, 3, 3). The rows of this 

1428 array correlate to the Bravais vectors a, b, and c. 

1429 unitcell_info (Optional[np.ndarray]): Basically identical to 

1430 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

1431 the first three columns are the unitcell_lengths in nanometer. 

1432 The other three columns are the unitcell_angles in degrees. 

1433 

1434 Returns: 

1435 np.ndarray: The result of the computation with shape (n_frames, n_indexes). 

1436 

1437 """ 

1438 if xyz is not None: 

1439 periodic = self.periodic 

1440 else: 

1441 periodic = self.periodic and self.traj._have_unitcell 

1442 xyz, unitcell_vectors, unitcell_info = Feature.transform( 

1443 self, xyz, unitcell_vectors, unitcell_info 

1444 ) 

1445 if periodic: 

1446 assert unitcell_vectors is not None 

1447 

1448 # convert to angles 

1449 if unitcell_info is None: 

1450 unitcell_angles = [] 

1451 for fr_unitcell_vectors in unitcell_vectors: 

1452 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles( 

1453 fr_unitcell_vectors[0], 

1454 fr_unitcell_vectors[1], 

1455 fr_unitcell_vectors[2], 

1456 ) 

1457 unitcell_angles.append(np.array([alpha, beta, gamma])) 

1458 else: 

1459 unitcell_angles = unitcell_info[:, 3:] 

1460 

1461 # check for an orthogonal box 

1462 orthogonal = np.allclose(unitcell_angles, 90) 

1463 

1464 out = np.empty( 

1465 (xyz.shape[0], self.angle_indexes.shape[0]), dtype="float32", order="C" 

1466 ) 

1467 _dihedral_mic( 

1468 xyz, 

1469 self.angle_indexes, 

1470 unitcell_vectors.transpose(0, 2, 1).copy(), 

1471 out, 

1472 orthogonal, 

1473 ) 

1474 

1475 else: 

1476 out = np.empty( 

1477 (xyz.shape[0], self.angle_indexes.shape[0]), dtype="float32", order="C" 

1478 ) 

1479 _dihedral(xyz, self.angle_indexes, out) 

1480 

1481 if self.cossin: 

1482 out = np.dstack((np.cos(out), np.sin(out))) 

1483 out = out.reshape(out.shape[0], out.shape[1] * out.shape[2]) 

1484 

1485 if self.deg: 

1486 out = np.rad2deg(out) 

1487 return out 

1488 

1489 

1490class DistanceFeature(Feature): 

1491 prefix_label: str = "DIST:" 

1492 

1493 def __init__( 

1494 self, 

1495 traj: Union[SingleTraj, TrajEnsemble], 

1496 distance_indexes: np.ndarray, 

1497 periodic: bool = True, 

1498 dim: Optional[int] = None, 

1499 check_aas: bool = True, 

1500 delayed: bool = False, 

1501 ) -> None: 

1502 super().__init__(traj, check_aas, periodic=periodic, delayed=delayed) 

1503 self.distance_indexes = np.array(distance_indexes) 

1504 if len(self.distance_indexes) == 0: 

1505 raise ValueError("empty indices") 

1506 if dim is None: 

1507 self._dim = len(distance_indexes) 

1508 else: 

1509 self._dim = dim 

1510 

1511 def describe(self) -> list[str]: 

1512 """Gives a list of strings describing this feature's feature-axis. 

1513 

1514 A feature computes a collective variable (CV). A CV is aligned with an MD 

1515 trajectory on the time/frame-axis. The feature axis is unique for every 

1516 feature. A feature describing the backbone torsions (phi, omega, psi) would 

1517 have a feature axis with the size 3*n-3, where n is the number of residues. 

1518 The end-to-end distance of a linear protein in contrast would just have 

1519 a feature axis with length 1. This `describe()` method will label these 

1520 values unambiguously. A backbone torsion feature's `describe()` could be 

1521 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

1522 The end-to-end distance feature could be described by 

1523 ['distance_between_MET1_and_LYS80']. 

1524 

1525 Returns: 

1526 list[str]: The labels of this feature. 

1527 

1528 """ 

1529 labels = [ 

1530 ( 

1531 f"{self.prefix_label} {_describe_atom(self.top, pair[0])} " 

1532 f"{_describe_atom(self.top, pair[1])}" 

1533 ) 

1534 for pair in self.distance_indexes 

1535 ] 

1536 return labels 

1537 

1538 @property 

1539 def dask_indices(self) -> str: 

1540 """str: The name of the delayed transformation to carry out with this feature.""" 

1541 return "distance_indexes" 

1542 

1543 @staticmethod 

1544 @dask.delayed 

1545 def dask_transform( 

1546 indexes: np.ndarray, 

1547 periodic: bool, 

1548 xyz: Optional[np.ndarray] = None, 

1549 unitcell_vectors: Optional[np.ndarray] = None, 

1550 unitcell_info: Optional[np.ndarray] = None, 

1551 ) -> dask.delayed: 

1552 """The same as `transform()` but without the need to pickle `traj`. 

1553 

1554 When dask delayed concurrencies are distributed, required python objects 

1555 are pickled. Thus, every feature needs to have its own pickled traj. 

1556 That defeats the purpose of dask distributed. Thus, this method implements 

1557 the same calculations as `transform` as a more barebones approach. 

1558 It foregoes the checks for periodicity and unit-cell shape and just 

1559 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a 

1560 staticmethod, so it doesn't require `self` to function. However, it 

1561 needs the indexes in `self.indexes`. That's why the `dask_indices` 

1562 property informs the scheduler to also pickle and pass this object to 

1563 the workers. 

1564 

1565 Args: 

1566 indexes (np.ndarray): A numpy array with shape (n, ) giving the 

1567 0-based index of the atoms which positions should be returned. 

1568 periodic (bool): Whether to observe the minimum image convention 

1569 and respect proteins breaking over the periodic boundary 

1570 condition as a whole (True). In this case, the trajectory container 

1571 in `traj` needs to have unitcell information. Defaults to True. 

1572 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

1573 in nanometer. If None is provided, the coordinates of `self.traj` 

1574 will be used. Otherwise, the topology of this set of xyz 

1575 coordinates should match the topology of `self.atom`. 

1576 Defaults to None. 

1577 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

1578 True, the `unitcell_vectors` are needed to calculate the 

1579 minimum image convention in a periodic space. This numpy 

1580 array should have the shape (n_frames, 3, 3). The rows of this 

1581 array correlate to the Bravais vectors a, b, and c. 

1582 unitcell_info (Optional[np.ndarray]): Basically identical to 

1583 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

1584 the first three columns are the unitcell_lengths in nanometer. 

1585 The other three columns are the unitcell_angles in degrees. 

1586 

1587 """ 

1588 if periodic: 

1589 assert unitcell_vectors is not None 

1590 # check for an orthogonal box 

1591 if unitcell_info is None: 

1592 # convert to angles 

1593 unitcell_angles = [] 

1594 for fr_unitcell_vectors in unitcell_vectors: 

1595 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles( 

1596 fr_unitcell_vectors[0], 

1597 fr_unitcell_vectors[1], 

1598 fr_unitcell_vectors[2], 

1599 ) 

1600 unitcell_angles.append(np.array([alpha, beta, gamma])) 

1601 else: 

1602 unitcell_angles = unitcell_info[:, 3:] 

1603 

1604 # check for an orthogonal box 

1605 orthogonal = np.allclose(unitcell_angles, 90) 

1606 

1607 out = np.empty( 

1608 ( 

1609 xyz.shape[0], 

1610 indexes.shape[0], 

1611 ), 

1612 dtype="float32", 

1613 order="C", 

1614 ) 

1615 _dist_mic( 

1616 xyz, 

1617 indexes.astype("int32"), 

1618 unitcell_vectors.transpose(0, 2, 1).copy(), 

1619 out, 

1620 orthogonal, 

1621 ) 

1622 else: 

1623 out = np.empty( 

1624 ( 

1625 xyz.shape[0], 

1626 indexes.shape[0], 

1627 ), 

1628 dtype="float32", 

1629 order="C", 

1630 ) 

1631 _dist(xyz, indexes.astype("int32"), out) 

1632 return out 

1633 

1634 def transform( 

1635 self, 

1636 xyz: Optional[np.ndarray] = None, 

1637 unitcell_vectors: Optional[np.ndarray] = None, 

1638 unitcell_info: Optional[np.ndarray] = None, 

1639 ) -> np.ndarray: 

1640 """Takes xyz and unitcell information to apply the topological calculations on. 

1641 

1642 When this method is not provided with any input, it will take the 

1643 traj_container provided as `traj` in the `__init__()` method and transforms 

1644 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer 

1645 of a trajectory with identical topology as `self.traj`. If `periodic` was 

1646 set to True, `unitcell_vectors` and `unitcell_info` should also be provided. 

1647 

1648 Args: 

1649 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

1650 in nanometer. If None is provided, the coordinates of `self.traj` 

1651 will be used. Otherwise, the topology of this set of xyz 

1652 coordinates should match the topology of `self.atom`. 

1653 Defaults to None. 

1654 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

1655 True, the `unitcell_vectors` are needed to calculate the 

1656 minimum image convention in a periodic space. This numpy 

1657 array should have the shape (n_frames, 3, 3). The rows of this 

1658 array correlate to the Bravais vectors a, b, and c. 

1659 unitcell_info (Optional[np.ndarray]): Basically identical to 

1660 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

1661 the first three columns are the unitcell_lengths in nanometer. 

1662 The other three columns are the unitcell_angles in degrees. 

1663 

1664 Returns: 

1665 np.ndarray: The result of the computation with shape (n_frames, n_indexes). 

1666 

1667 """ 

1668 if xyz is not None: 

1669 periodic = self.periodic 

1670 else: 

1671 periodic = self.periodic and self.traj._have_unitcell 

1672 xyz, unitcell_vectors, unitcell_info = super().transform( 

1673 xyz, unitcell_vectors, unitcell_info 

1674 ) 

1675 if periodic: 

1676 assert unitcell_info is not None 

1677 

1678 # check for an orthogonal box 

1679 if unitcell_info is None: 

1680 # convert to angles 

1681 unitcell_angles = [] 

1682 for fr_unitcell_vectors in unitcell_vectors: 

1683 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles( 

1684 fr_unitcell_vectors[0], 

1685 fr_unitcell_vectors[1], 

1686 fr_unitcell_vectors[2], 

1687 ) 

1688 unitcell_angles.append(np.array([alpha, beta, gamma])) 

1689 else: 

1690 unitcell_angles = unitcell_info[:, 3:] 

1691 # check for an orthogonal box 

1692 orthogonal = np.allclose(unitcell_angles, 90) 

1693 out = np.empty( 

1694 ( 

1695 xyz.shape[0], 

1696 self.distance_indexes.shape[0], 

1697 ), 

1698 dtype="float32", 

1699 order="C", 

1700 ) 

1701 _dist_mic( 

1702 xyz, 

1703 np.ascontiguousarray(self.distance_indexes.astype("int32")), 

1704 unitcell_vectors.transpose(0, 2, 1).copy(), 

1705 out, 

1706 orthogonal, 

1707 ) 

1708 else: 

1709 out = np.empty( 

1710 ( 

1711 xyz.shape[0], 

1712 self.distance_indexes.shape[0], 

1713 ), 

1714 dtype="float32", 

1715 order="C", 

1716 ) 

1717 _dist(xyz, np.ascontiguousarray(self.distance_indexes.astype("int32")), out) 

1718 return out 

1719 

1720 

1721class AlignFeature(SelectionFeature): 

1722 prefix_label: str = "aligned ATOM:" 

1723 

1724 def __init__( 

1725 self, 

1726 traj: SingleTraj, 

1727 reference: md.Trajectory, 

1728 indexes: np.ndarray, 

1729 atom_indices: Optional[np.ndarray] = None, 

1730 ref_atom_indices: Optional[np.ndarray] = None, 

1731 in_place: bool = False, 

1732 delayed: bool = False, 

1733 ) -> None: 

1734 super(AlignFeature, self).__init__(traj=traj, indexes=indexes, delayed=delayed) 

1735 self.ref = reference 

1736 self.atom_indices = atom_indices 

1737 self.ref_atom_indices = ref_atom_indices 

1738 self.in_place = in_place 

1739 

1740 def transform( 

1741 self, 

1742 xyz: Optional[np.ndarray] = None, 

1743 unitcell_vectors: Optional[np.ndarray] = None, 

1744 unitcell_info: Optional[np.ndarray] = None, 

1745 ) -> np.ndarray: 

1746 """Returns the aligned xyz coordinates.""" 

1747 if not self.in_place: 

1748 traj = self.traj.traj.slice(slice(None), copy=True) 

1749 else: 

1750 traj = self.traj.traj 

1751 traj.xyz = xyz 

1752 aligned = traj.superpose( 

1753 reference=self.ref, 

1754 atom_indices=self.atom_indices, 

1755 ref_atom_indices=self.ref_atom_indices, 

1756 ) 

1757 # apply selection 

1758 return super(AlignFeature, self).transform( 

1759 aligned.xyz, unitcell_vectors, unitcell_info 

1760 ) 

1761 

1762 

1763class InverseDistanceFeature(DistanceFeature): 

1764 prefix_label: str = "INVDIST:" 

1765 

1766 def __init__( 

1767 self, 

1768 traj: Union[SingleTraj, TrajEnsemble], 

1769 distance_indexes: np.ndarray, 

1770 periodic: bool = True, 

1771 delayed: bool = False, 

1772 ) -> None: 

1773 DistanceFeature.__init__( 

1774 self, traj, distance_indexes, periodic=periodic, delayed=delayed 

1775 ) 

1776 

1777 @property 

1778 def dask_indices(self) -> str: 

1779 """str: The name of the delayed transformation to carry out with this feature.""" 

1780 return "distance_indexes" 

1781 

1782 @staticmethod 

1783 @dask.delayed 

1784 def dask_transform( 

1785 indexes: np.ndarray, 

1786 periodic: bool, 

1787 xyz: Optional[np.ndarray] = None, 

1788 unitcell_vectors: Optional[np.ndarray] = None, 

1789 unitcell_info: Optional[np.ndarray] = None, 

1790 ) -> dask.delayed: 

1791 """The same as `transform()` but without the need to pickle `traj`. 

1792 

1793 When dask delayed concurrencies are distributed, required python objects 

1794 are pickled. Thus, every feature needs to have its own pickled traj. 

1795 That defeats the purpose of dask distributed. Thus, this method implements 

1796 the same calculations as `transform` as a more barebones approach. 

1797 It foregoes the checks for periodicity and unit-cell shape and just 

1798 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a 

1799 staticmethod, so it doesn't require `self` to function. However, it 

1800 needs the indexes in `self.indexes`. That's why the `dask_indices` 

1801 property informs the scheduler to also pickle and pass this object to 

1802 the workers. 

1803 

1804 Args: 

1805 indexes (np.ndarray): A numpy array with shape (n, ) giving the 

1806 0-based index of the atoms which positions should be returned. 

1807 periodic (bool): Whether to observe the minimum image convention 

1808 and respect proteins breaking over the periodic boundary 

1809 condition as a whole (True). In this case, the trajectory container 

1810 in `traj` needs to have unitcell information. Defaults to True. 

1811 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

1812 in nanometer. If None is provided, the coordinates of `self.traj` 

1813 will be used. Otherwise, the topology of this set of xyz 

1814 coordinates should match the topology of `self.atom`. 

1815 Defaults to None. 

1816 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

1817 True, the `unitcell_vectors` are needed to calculate the 

1818 minimum image convention in a periodic space. This numpy 

1819 array should have the shape (n_frames, 3, 3). The rows of this 

1820 array correlate to the Bravais vectors a, b, and c. 

1821 unitcell_info (Optional[np.ndarray]): Basically identical to 

1822 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

1823 the first three columns are the unitcell_lengths in nanometer. 

1824 The other three columns are the unitcell_angles in degrees. 

1825 

1826 """ 

1827 if periodic: 

1828 assert unitcell_vectors is not None 

1829 # check for an orthogonal box 

1830 if unitcell_info is None: 

1831 # convert to angles 

1832 unitcell_angles = [] 

1833 for fr_unitcell_vectors in unitcell_vectors: 

1834 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles( 

1835 fr_unitcell_vectors[0], 

1836 fr_unitcell_vectors[1], 

1837 fr_unitcell_vectors[2], 

1838 ) 

1839 unitcell_angles.append(np.array([alpha, beta, gamma])) 

1840 else: 

1841 unitcell_angles = unitcell_info[:, 3:] 

1842 # check for an orthogonal box 

1843 orthogonal = np.allclose(unitcell_angles, 90) 

1844 

1845 out = np.empty( 

1846 ( 

1847 xyz.shape[0], 

1848 indexes.shape[0], 

1849 ), 

1850 dtype="float32", 

1851 order="C", 

1852 ) 

1853 _dist_mic( 

1854 xyz, 

1855 indexes.astype("int32"), 

1856 unitcell_vectors.transpose(0, 2, 1).copy(), 

1857 out, 

1858 orthogonal, 

1859 ) 

1860 else: 

1861 out = np.empty( 

1862 ( 

1863 xyz.shape[0], 

1864 indexes.shape[0], 

1865 ), 

1866 dtype="float32", 

1867 order="C", 

1868 ) 

1869 _dist(xyz, indexes.astype("int32"), out) 

1870 return 1 / out 

1871 

1872 def transform( 

1873 self, 

1874 xyz: Optional[np.ndarray] = None, 

1875 unitcell_vectors: Optional[np.ndarray] = None, 

1876 unitcell_info: Optional[np.ndarray] = None, 

1877 ) -> np.ndarray: 

1878 """Takes xyz and unitcell information to apply the topological calculations on. 

1879 

1880 When this method is not provided with any input, it will take the 

1881 traj_container provided as `traj` in the `__init__()` method and transforms 

1882 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer 

1883 of a trajectory with identical topology as `self.traj`. If `periodic` was 

1884 set to True, `unitcell_vectors` and `unitcell_info` should also be provided. 

1885 

1886 Args: 

1887 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

1888 in nanometer. If None is provided, the coordinates of `self.traj` 

1889 will be used. Otherwise, the topology of this set of xyz 

1890 coordinates should match the topology of `self.atom`. 

1891 Defaults to None. 

1892 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

1893 True, the `unitcell_vectors` are needed to calculate the 

1894 minimum image convention in a periodic space. This numpy 

1895 array should have the shape (n_frames, 3, 3). The rows of this 

1896 array correlate to the Bravais vectors a, b, and c. 

1897 unitcell_info (Optional[np.ndarray]): Basically identical to 

1898 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

1899 the first three columns are the unitcell_lengths in nanometer. 

1900 The other three columns are the unitcell_angles in degrees. 

1901 

1902 Returns: 

1903 np.ndarray: The result of the computation with shape (n_frames, n_indexes). 

1904 

1905 """ 

1906 return 1.0 / super().transform(xyz, unitcell_vectors, unitcell_info) 

1907 

1908 

1909class ContactFeature(DistanceFeature): 

1910 """Defines certain distances as contacts and returns a binary (0, 1) result. 

1911 

1912 Instead of returning the binary result can also count contacts with the 

1913 argument `count_contacts=True` provided at instantiation. In that case, 

1914 every frame returns an integer number. 

1915 

1916 """ 

1917 

1918 prefix_label: str = "CONTACT:" 

1919 _nonstandard_transform_args: list[str] = ["threshold", "count_contacts"] 

1920 

1921 def __init__( 

1922 self, 

1923 traj: SingleTraj, 

1924 distance_indexes: np.ndarray, 

1925 threshold: float = 5.0, 

1926 periodic: bool = True, 

1927 count_contacts: bool = False, 

1928 delayed: bool = False, 

1929 ) -> None: 

1930 """Instantiate the contact feature. 

1931 

1932 A regular contact feature yields a np.ndarray with zeros and ones. 

1933 The zeros are no contact. The ones are contact. 

1934 

1935 Args: 

1936 traj (SingleTraj): An instance of `SingleTraj`. 

1937 distance_indexes (np.ndarray): An np.ndarray with shape (n_dists, 2), 

1938 where distance_indexes[:, 0] indexes the first atoms of the distance 

1939 measurement, and distance_indexes[:, 1] indexes the second atoms of the 

1940 distance measurement. 

1941 threshold (float): The threshold in nm, under which a distance is 

1942 considered to be a contact. Defaults to 5.0 nm. 

1943 periodic (bool): Whether to use the minimum image convention when 

1944 calculating distances. Defaults to True. 

1945 count_contacts (bool): When True, return an integer of the number of 

1946 contacts instead of returning the array of regular contacts. 

1947 

1948 """ 

1949 super(ContactFeature, self).__init__( 

1950 traj, distance_indexes, periodic=periodic, delayed=delayed 

1951 ) 

1952 if count_contacts: 

1953 self.prefix_label: str = "counted " + self.prefix_label 

1954 self.threshold = threshold 

1955 self.count_contacts = count_contacts 

1956 if count_contacts: 

1957 self.dimension = 1 

1958 else: 

1959 self.dimension = len(self.distance_indexes) 

1960 

1961 @property 

1962 def dask_indices(self) -> str: 

1963 """str: The name of the delayed transformation to carry out with this feature.""" 

1964 return "distance_indexes" 

1965 

1966 @staticmethod 

1967 @dask.delayed 

1968 def dask_transform( 

1969 indexes: np.ndarray, 

1970 periodic: bool, 

1971 threshold: float, 

1972 count_contacts: bool, 

1973 xyz: Optional[np.ndarray] = None, 

1974 unitcell_vectors: Optional[np.ndarray] = None, 

1975 unitcell_info: Optional[np.ndarray] = None, 

1976 ) -> dask.delayed: 

1977 """The same as `transform()` but without the need to pickle `traj`. 

1978 

1979 When dask delayed concurrencies are distributed, required python objects 

1980 are pickled. Thus, every feature needs to have its own pickled traj. 

1981 That defeats the purpose of dask distributed. Thus, this method implements 

1982 the same calculations as `transform` as a more barebones approach. 

1983 It foregoes the checks for periodicity and unit-cell shape and just 

1984 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a 

1985 staticmethod, so it doesn't require `self` to function. However, it 

1986 needs the indexes in `self.indexes`. That's why the `dask_indices` 

1987 property informs the scheduler to also pickle and pass this object to 

1988 the workers. 

1989 

1990 Args: 

1991 indexes (np.ndarray): A numpy array with shape (n, ) giving the 

1992 0-based index of the atoms which positions should be returned. 

1993 periodic (bool): Whether to observe the minimum image convention 

1994 and respect proteins breaking over the periodic boundary 

1995 condition as a whole (True). In this case, the trajectory container 

1996 in `traj` needs to have unitcell information. Defaults to True. 

1997 threshold (float): The threshold in nm, under which a distance is 

1998 considered to be a contact. Defaults to 5.0 nm. 

1999 count_contacts (bool): When True, return an integer of the number of contacts 

2000 instead of returning the array of regular contacts. 

2001 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

2002 in nanometer. If None is provided, the coordinates of `self.traj` 

2003 will be used. Otherwise, the topology of this set of xyz 

2004 coordinates should match the topology of `self.atom`. 

2005 Defaults to None. 

2006 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

2007 True, the `unitcell_vectors` are needed to calculate the 

2008 minimum image convention in a periodic space. This numpy 

2009 array should have the shape (n_frames, 3, 3). The rows of this 

2010 array correlate to the Bravais vectors a, b, and c. 

2011 unitcell_info (Optional[np.ndarray]): Basically identical to 

2012 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

2013 the first three columns are the unitcell_lengths in nanometer. 

2014 The other three columns are the unitcell_angles in degrees. 

2015 

2016 """ 

2017 if periodic: 

2018 assert unitcell_vectors is not None 

2019 # check for an orthogonal box 

2020 if unitcell_info is None: 

2021 # convert to angles 

2022 unitcell_angles = [] 

2023 for fr_unitcell_vectors in unitcell_vectors: 

2024 _, _, _, alpha, beta, gamma = box_vectors_to_lengths_and_angles( 

2025 fr_unitcell_vectors[0], 

2026 fr_unitcell_vectors[1], 

2027 fr_unitcell_vectors[2], 

2028 ) 

2029 unitcell_angles.append(np.array([alpha, beta, gamma])) 

2030 else: 

2031 unitcell_angles = unitcell_info[:, 3:] 

2032 # check for an orthogonal box 

2033 orthogonal = np.allclose(unitcell_angles, 90) 

2034 

2035 out = np.empty( 

2036 ( 

2037 xyz.shape[0], 

2038 indexes.shape[0], 

2039 ), 

2040 dtype="float32", 

2041 order="C", 

2042 ) 

2043 _dist_mic( 

2044 xyz, 

2045 indexes.astype("int32"), 

2046 unitcell_vectors.transpose(0, 2, 1).copy(), 

2047 out, 

2048 orthogonal, 

2049 ) 

2050 else: 

2051 out = np.empty( 

2052 ( 

2053 xyz.shape[0], 

2054 indexes.shape[0], 

2055 ), 

2056 dtype="float32", 

2057 order="C", 

2058 ) 

2059 _dist(xyz, indexes.astype("int32"), out) 

2060 res = np.zeros((len(out), indexes.shape[0]), dtype=np.float32) 

2061 I = np.argwhere(out <= threshold) # noqa: E741 

2062 res[I[:, 0], I[:, 1]] = 1.0 

2063 if count_contacts: 

2064 return res.sum(axis=1, keepdims=True) 

2065 else: 

2066 return res 

2067 

2068 def transform( 

2069 self, 

2070 xyz: Optional[np.ndarray] = None, 

2071 unitcell_vectors: Optional[np.ndarray] = None, 

2072 unitcell_info: Optional[np.ndarray] = None, 

2073 ) -> np.ndarray: 

2074 """Takes xyz and unitcell information to apply the topological calculations on. 

2075 

2076 When this method is not provided with any input, it will take the 

2077 traj_container provided as `traj` in the `__init__()` method and transforms 

2078 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer 

2079 of a trajectory with identical topology as `self.traj`. If `periodic` was 

2080 set to True, `unitcell_vectors` and `unitcell_info` should also be provided. 

2081 

2082 Args: 

2083 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

2084 in nanometer. If None is provided, the coordinates of `self.traj` 

2085 will be used. Otherwise, the topology of this set of xyz 

2086 coordinates should match the topology of `self.atom`. 

2087 Defaults to None. 

2088 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

2089 True, the `unitcell_vectors` are needed to calculate the 

2090 minimum image convention in a periodic space. This numpy 

2091 array should have the shape (n_frames, 3, 3). The rows of this 

2092 array correlate to the Bravais vectors a, b, and c. 

2093 unitcell_info (Optional[np.ndarray]): Basically identical to 

2094 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

2095 the first three columns are the unitcell_lengths in nanometer. 

2096 The other three columns are the unitcell_angles in degrees. 

2097 

2098 Returns: 

2099 np.ndarray: The result of the computation with shape (n_frames, n_indexes). 

2100 

2101 """ 

2102 dists = super(ContactFeature, self).transform( 

2103 xyz, unitcell_vectors, unitcell_info 

2104 ) 

2105 res = np.zeros( 

2106 (len(self.traj), self.distance_indexes.shape[0]), dtype=np.float32 

2107 ) 

2108 I = np.argwhere(dists <= self.threshold) # noqa: E741 

2109 res[I[:, 0], I[:, 1]] = 1.0 

2110 if self.count_contacts: 

2111 return res.sum(axis=1, keepdims=True) 

2112 else: 

2113 return res 

2114 

2115 

2116class BackboneTorsionFeature(DihedralFeature): 

2117 def __init__( 

2118 self, 

2119 traj: SingleTraj, 

2120 selstr: Optional[str] = None, 

2121 deg: bool = False, 

2122 cossin: bool = False, 

2123 periodic: bool = True, 

2124 delayed: bool = False, 

2125 ) -> None: 

2126 self.traj = traj 

2127 indices = self.traj.indices_phi 

2128 self.selstr = selstr 

2129 

2130 if not selstr: 

2131 self._phi_inds = indices 

2132 else: 

2133 self._phi_inds = indices[ 

2134 np.in1d(indices[:, 1], self.traj.top.select(selstr), assume_unique=True) 

2135 ] 

2136 

2137 indices = self.traj.indices_psi 

2138 if not selstr: 

2139 self._psi_inds = indices 

2140 else: 

2141 self._psi_inds = indices[ 

2142 np.in1d(indices[:, 1], self.traj.top.select(selstr), assume_unique=True) 

2143 ] 

2144 

2145 # alternate phi, psi pairs (phi_1, psi_1, ..., phi_n, psi_n) 

2146 dih_indexes = np.array( 

2147 list(phi_psi for phi_psi in zip(self._phi_inds, self._psi_inds)) 

2148 ).reshape(-1, 4) 

2149 

2150 super(BackboneTorsionFeature, self).__init__( 

2151 self.traj, 

2152 dih_indexes, 

2153 deg=deg, 

2154 cossin=cossin, 

2155 periodic=periodic, 

2156 delayed=delayed, 

2157 ) 

2158 

2159 def describe(self) -> list[str]: 

2160 """Gives a list of strings describing this feature's feature-axis. 

2161 

2162 A feature computes a collective variable (CV). A CV is aligned with an MD 

2163 trajectory on the time/frame-axis. The feature axis is unique for every 

2164 feature. A feature describing the backbone torsions (phi, omega, psi) would 

2165 have a feature axis with the size 3*n-3, where n is the number of residues. 

2166 The end-to-end distance of a linear protein in contrast would just have 

2167 a feature axis with length 1. This `describe()` method will label these 

2168 values unambiguously. A backbone torsion feature's `describe()` could be 

2169 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

2170 The end-to-end distance feature could be described by 

2171 ['distance_between_MET1_and_LYS80']. 

2172 

2173 Returns: 

2174 list[str]: The labels of this feature. The length 

2175 is determined by the `dih_indexes` and the `cossin` argument 

2176 in the `__init__()` method. If `cossin` is false, then 

2177 `len(describe()) == self.angle_indexes[-1]`, else `len(describe())` 

2178 is twice as long. 

2179 

2180 """ 

2181 top = self.traj.top 

2182 getlbl = lambda at: "%i %s %i" % ( 

2183 at.residue.chain.index, 

2184 at.residue.name, 

2185 at.residue.resSeq, 

2186 ) 

2187 

2188 if self.cossin: 

2189 sin_cos = ("COS(PHI %s)", "SIN(PHI %s)") 

2190 labels_phi = [ 

2191 ( 

2192 sin_cos[0] % getlbl(top.atom(ires[1])), 

2193 sin_cos[1] % getlbl(top.atom(ires[1])), 

2194 ) 

2195 for ires in self._phi_inds 

2196 ] 

2197 sin_cos = ("COS(PSI %s)", "SIN(PSI %s)") 

2198 labels_psi = [ 

2199 ( 

2200 sin_cos[0] % getlbl(top.atom(ires[1])), 

2201 sin_cos[1] % getlbl(top.atom(ires[1])), 

2202 ) 

2203 for ires in self._psi_inds 

2204 ] 

2205 # produce the same ordering as the given indices (phi_1, psi_1, ..., phi_n, psi_n) 

2206 # or (cos(phi_1), sin(phi_1), cos(psi_1), sin(psi_1), ..., cos(phi_n), sin(phi_n), cos(psi_n), sin(psi_n)) 

2207 res = list( 

2208 itertools.chain.from_iterable( 

2209 itertools.chain.from_iterable(zip(labels_phi, labels_psi)) 

2210 ) 

2211 ) 

2212 else: 

2213 labels_phi = [ 

2214 "PHI %s" % getlbl(top.atom(ires[1])) for ires in self._phi_inds 

2215 ] 

2216 labels_psi = [ 

2217 "PSI %s" % getlbl(top.atom(ires[1])) for ires in self._psi_inds 

2218 ] 

2219 res = list(itertools.chain.from_iterable(zip(labels_phi, labels_psi))) 

2220 return res 

2221 

2222 

2223class ResidueMinDistanceFeature(DistanceFeature): 

2224 _nonstandard_transform_args: list[str] = [ 

2225 "threshold", 

2226 "count_contacts", 

2227 "scheme", 

2228 "top", 

2229 ] 

2230 

2231 def __init__( 

2232 self, 

2233 traj: SingleTraj, 

2234 contacts: np.ndarray, 

2235 scheme: Literal["ca", "closest", "closest-heavy"], 

2236 ignore_nonprotein: bool, 

2237 threshold: float, 

2238 periodic: bool, 

2239 count_contacts: bool = False, 

2240 delayed: bool = False, 

2241 ) -> None: 

2242 if count_contacts and threshold is None: 

2243 raise ValueError( 

2244 "Cannot count contacts when no contact threshold is supplied." 

2245 ) 

2246 

2247 self.contacts = contacts 

2248 self.scheme = scheme 

2249 self.threshold = threshold 

2250 self.prefix_label: str = "RES_DIST (%s)" % scheme 

2251 self.ignore_nonprotein = ignore_nonprotein 

2252 

2253 if count_contacts: 

2254 self.prefix_label: str = "counted " + self.prefix_label 

2255 self.count_contacts = count_contacts 

2256 

2257 dummy_traj = md.Trajectory(np.zeros((traj.top.n_atoms, 3)), traj.top) 

2258 dummy_dist, dummy_pairs = md.compute_contacts( 

2259 dummy_traj, 

2260 contacts=contacts, 

2261 scheme=scheme, 

2262 periodic=periodic, 

2263 ignore_nonprotein=ignore_nonprotein, 

2264 ) 

2265 

2266 dim = 1 if count_contacts else dummy_dist.shape[1] 

2267 super(ResidueMinDistanceFeature, self).__init__( 

2268 distance_indexes=dummy_pairs, 

2269 traj=traj, 

2270 periodic=periodic, 

2271 dim=dim, 

2272 delayed=delayed, 

2273 ) 

2274 

2275 def describe(self) -> list[str]: 

2276 """Gives a list of strings describing this feature's feature-axis. 

2277 

2278 A feature computes a collective variable (CV). A CV is aligned with an MD 

2279 trajectory on the time/frame-axis. The feature axis is unique for every 

2280 feature. A feature describing the backbone torsions (phi, omega, psi) would 

2281 have a feature axis with the size 3*n-3, where n is the number of residues. 

2282 The end-to-end distance of a linear protein in contrast would just have 

2283 a feature axis with length 1. This `describe()` method will label these 

2284 values unambiguously. A backbone torsion feature's `describe()` could be 

2285 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

2286 The end-to-end distance feature could be described by 

2287 ['distance_between_MET1_and_LYS80']. 

2288 

2289 Returns: 

2290 list[str]: The labels of this feature. 

2291 

2292 """ 

2293 labels = [] 

2294 for a, b in self.distance_indexes: 

2295 labels.append( 

2296 f"{self.prefix_label} {self.traj.top.residue(a)} - {self.traj.top.residue(b)}" 

2297 ) 

2298 return labels 

2299 

2300 @property 

2301 def dask_indices(self) -> str: 

2302 """str: The name of the delayed transformation to carry out with this feature.""" 

2303 return "contacts" 

2304 

2305 @staticmethod 

2306 @dask.delayed 

2307 def dask_transform( 

2308 indexes: np.ndarray, 

2309 top: md.Topology, 

2310 scheme: Literal["ca", "closest", "closest-heavy"], 

2311 periodic: bool, 

2312 threshold: float, 

2313 count_contacts: bool, 

2314 xyz: Optional[np.ndarray] = None, 

2315 unitcell_vectors: Optional[np.ndarray] = None, 

2316 unitcell_info: Optional[np.ndarray] = None, 

2317 ) -> dask.delayed: 

2318 """The same as `transform()` but without the need to pickle `traj`. 

2319 

2320 When dask delayed concurrencies are distributed, required python objects 

2321 are pickled. Thus, every feature needs to have its own pickled traj. 

2322 That defeats the purpose of dask distributed. Thus, this method implements 

2323 the same calculations as `transform` as a more barebones approach. 

2324 It foregoes the checks for periodicity and unit-cell shape and just 

2325 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a 

2326 staticmethod, so it doesn't require `self` to function. However, it 

2327 needs the indexes in `self.indexes`. That's why the `dask_indices` 

2328 property informs the scheduler to also pickle and pass this object to 

2329 the workers. 

2330 

2331 Args: 

2332 indexes (np.ndarray): For this special feature, the indexes argument 

2333 in the @staticmethod dask_transform is `self.contacts`. 

2334 periodic (bool): Whether to observe the minimum image convention 

2335 and respect proteins breaking over the periodic boundary 

2336 condition as a whole (True). In this case, the trajectory container 

2337 in `traj` needs to have unitcell information. Defaults to True. 

2338 threshold (float): The threshold in nm, under which a distance is 

2339 considered to be a contact. Defaults to 5.0 nm. 

2340 count_contacts (bool): When True, return an integer of the number of contacts 

2341 instead of returning the array of regular contacts. 

2342 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

2343 in nanometer. If None is provided, the coordinates of `self.traj` 

2344 will be used. Otherwise, the topology of this set of xyz 

2345 coordinates should match the topology of `self.atom`. 

2346 Defaults to None. 

2347 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

2348 True, the `unitcell_vectors` are needed to calculate the 

2349 minimum image convention in a periodic space. This numpy 

2350 array should have the shape (n_frames, 3, 3). The rows of this 

2351 array correlate to the Bravais vectors a, b, and c. 

2352 unitcell_info (Optional[np.ndarray]): Basically identical to 

2353 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

2354 the first three columns are the unitcell_lengths in nanometer. 

2355 The other three columns are the unitcell_angles in degrees. 

2356 

2357 """ 

2358 # create a dummy traj, with the appropriate topology 

2359 traj = md.Trajectory( 

2360 xyz=xyz, 

2361 topology=top, 

2362 unitcell_lengths=unitcell_info[:, :3], 

2363 unitcell_angles=unitcell_info[:, 3:], 

2364 ) 

2365 

2366 # We let mdtraj compute the contacts with the input scheme 

2367 D = md.compute_contacts( 

2368 traj, 

2369 contacts=indexes, 

2370 scheme=scheme, 

2371 periodic=periodic, 

2372 )[0] 

2373 

2374 res = np.zeros_like(D) 

2375 # Do we want binary? 

2376 if threshold is not None: 

2377 I = np.argwhere(D <= threshold) 

2378 res[I[:, 0], I[:, 1]] = 1.0 

2379 else: 

2380 res = D 

2381 

2382 if count_contacts and threshold is not None: 

2383 return res.sum(axis=1, keepdims=True) 

2384 else: 

2385 return res 

2386 

2387 def transform( 

2388 self, 

2389 xyz: Optional[np.ndarray] = None, 

2390 unitcell_vectors: Optional[np.ndarray] = None, 

2391 unitcell_info: Optional[np.ndarray] = None, 

2392 ) -> np.ndarray: 

2393 """Takes xyz and unitcell information to apply the topological calculations on. 

2394 

2395 When this method is not provided with any input, it will take the 

2396 traj_container provided as `traj` in the `__init__()` method and transforms 

2397 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer 

2398 of a trajectory with identical topology as `self.traj`. If `periodic` was 

2399 set to True, `unitcell_vectors` and `unitcell_info` should also be provided. 

2400 

2401 Args: 

2402 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

2403 in nanometer. If None is provided, the coordinates of `self.traj` 

2404 will be used. Otherwise, the topology of this set of xyz 

2405 coordinates should match the topology of `self.atom`. 

2406 Defaults to None. 

2407 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

2408 True, the `unitcell_vectors` are needed to calculate the 

2409 minimum image convention in a periodic space. This numpy 

2410 array should have the shape (n_frames, 3, 3). The rows of this 

2411 array correlate to the Bravais vectors a, b, and c. 

2412 unitcell_info (Optional[np.ndarray]): Basically identical to 

2413 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

2414 the first three columns are the unitcell_lengths in nanometer. 

2415 The other three columns are the unitcell_angles in degrees. 

2416 

2417 Returns: 

2418 np.ndarray: The result of the computation with shape (n_frames, n_indexes). 

2419 

2420 """ 

2421 ( 

2422 xyz, 

2423 unitcell_vectors, 

2424 unitcell_info, 

2425 ) = Feature.transform(self, xyz, unitcell_vectors, unitcell_info) 

2426 

2427 # create a dummy traj, with the appropriate topology 

2428 traj = md.Trajectory( 

2429 xyz=xyz, 

2430 topology=self.traj.top, 

2431 unitcell_lengths=unitcell_info[:, :3], 

2432 unitcell_angles=unitcell_info[:, 3:], 

2433 ) 

2434 

2435 # We let mdtraj compute the contacts with the input scheme 

2436 D = md.compute_contacts( 

2437 traj, 

2438 contacts=self.contacts, 

2439 scheme=self.scheme, 

2440 periodic=self.periodic, 

2441 )[0] 

2442 

2443 res = np.zeros_like(D) 

2444 # Do we want binary? 

2445 if self.threshold is not None: 

2446 I = np.argwhere(D <= self.threshold) 

2447 res[I[:, 0], I[:, 1]] = 1.0 

2448 else: 

2449 res = D 

2450 

2451 if self.count_contacts and self.threshold is not None: 

2452 return res.sum(axis=1, keepdims=True) 

2453 else: 

2454 return res 

2455 

2456 

2457class GroupCOMFeature(Feature): 

2458 """Cartesian coordinates of the center-of-mass (COM) of atom groups. 

2459 

2460 Groups can be defined as sequences of sequences of int. So a list of list of int 

2461 can be used to define groups of various sizes. The resulting array will have 

2462 the shape of (n_frames, n_groups ** 2). The xyz coordinates are flattended, 

2463 so the array can be rebuilt with `np.dstack()` 

2464 

2465 Examples: 

2466 >>> import encodermap as em 

2467 >>> import numpy as np 

2468 >>> traj = em.SingleTraj.from_pdb_id("1YUG") 

2469 >>> f = em.features.GroupCOMFeature( 

2470 ... traj=traj, 

2471 ... group_definitions=[ 

2472 ... [0, 1, 2], 

2473 ... [3, 4, 5, 6, 7], 

2474 ... [8, 9, 10], 

2475 ... ] 

2476 ... ) 

2477 >>> a = f.transform() 

2478 >>> a.shape # this array is flattened along the feature axis 

2479 (15, 9) 

2480 >>> a = np.dstack([ 

2481 ... a[..., ::3], 

2482 ... a[..., 1::3], 

2483 ... a[..., 2::3], 

2484 ... ]) 

2485 >>> a.shape # now the z, coordinate of the 2nd center of mass is a[:, 1, -1] 

2486 (15, 3, 3) 

2487 

2488 Note: 

2489 Centering (`ref_geom`) and imaging (`image_molecules=True`) can be time- 

2490 consuming. Consider doing this to your trajectory files prior to featurization. 

2491 

2492 """ 

2493 

2494 _nonstandard_transform_args: list[str] = [ 

2495 "top", 

2496 "ref_geom", 

2497 "image_molecules", 

2498 "masses_in_groups", 

2499 ] 

2500 

2501 def __init__( 

2502 self, 

2503 traj: SingleTraj, 

2504 group_definitions: Sequence[Sequence[int]], 

2505 ref_geom: Optional[md.Trajectory] = None, 

2506 image_molecules: bool = False, 

2507 mass_weighted: bool = True, 

2508 delayed: bool = False, 

2509 ) -> None: 

2510 """Instantiate the GroupCOMFeature. 

2511 

2512 Args: 

2513 traj (SingleTraj): An instance of `SingleTraj`. 

2514 group_definitions (Sequence[Sequence[int]]): A sequence of sequences 

2515 of int defining the groups of which the COM should be calculated. 

2516 See the example for how to use this argument. 

2517 ref_geom (Optional[md.Trajectory]): The coordinates can be centered 

2518 to a reference geometry before computing the COM. Defaults to None. 

2519 image_molecules (bool): The method traj.image_molecules will be 

2520 called before computing averages. The method tries to correct 

2521 for molecules broken across periodic boundary conditions, 

2522 but can be time-consuming. See 

2523 http://mdtraj.org/latest/api/generated/mdtraj.Trajectory.html#mdtraj.Trajectory.image_molecules 

2524 for more details 

2525 mass_weighted (bool): Whether the COM should be calculated mass-weighted. 

2526 

2527 """ 

2528 self._raise_on_unitcell = False 

2529 if not (ref_geom is None or isinstance(ref_geom, md.Trajectory)): 

2530 raise ValueError( 

2531 f"argument ref_geom has to be either None or and " 

2532 f"mdtraj.Trajectory, got instead {type(ref_geom)}" 

2533 ) 

2534 

2535 self.ref_geom = ref_geom 

2536 self.traj = traj 

2537 self.top = traj.top 

2538 self.image_molecules = image_molecules 

2539 self.group_definitions = [np.asarray(gf) for gf in group_definitions] 

2540 self.atom_masses = np.array([aa.element.mass for aa in self.traj.top.atoms]) 

2541 

2542 if mass_weighted: 

2543 self.masses_in_groups = [ 

2544 self.atom_masses[aa_in_rr] for aa_in_rr in self.group_definitions 

2545 ] 

2546 else: 

2547 self.masses_in_groups = [ 

2548 np.ones_like(aa_in_rr) for aa_in_rr in self.group_definitions 

2549 ] 

2550 

2551 self.delayed = delayed 

2552 

2553 # Prepare and store the description 

2554 self._describe = [] 

2555 for group in self.group_definitions: 

2556 for coor in "xyz": 

2557 self._describe.append( 

2558 f"COM-{coor} of atom group [{group[:3]}..{group[-3:]}]" 

2559 ) 

2560 self.dimension = 3 * len(self.group_definitions) 

2561 

2562 @property 

2563 def dask_indices(self) -> str: 

2564 """str: The name of the delayed transformation to carry out with this feature.""" 

2565 return "group_definitions" 

2566 

2567 def describe(self) -> list[str]: 

2568 """Gives a list of strings describing this feature's feature-axis. 

2569 

2570 A feature computes a collective variable (CV). A CV is aligned with an MD 

2571 trajectory on the time/frame-axis. The feature axis is unique for every 

2572 feature. A feature describing the backbone torsions (phi, omega, psi) would 

2573 have a feature axis with the size 3*n-3, where n is the number of residues. 

2574 The end-to-end distance of a linear protein in contrast would just have 

2575 a feature axis with length 1. This `describe()` method will label these 

2576 values unambiguously. A backbone torsion feature's `describe()` could be 

2577 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

2578 The end-to-end distance feature could be described by 

2579 ['distance_between_MET1_and_LYS80']. 

2580 

2581 Returns: 

2582 list[str]: The labels of this feature. 

2583 

2584 """ 

2585 return self._describe 

2586 

2587 @staticmethod 

2588 @dask.delayed 

2589 def dask_transform( 

2590 indexes: list[list[int]], 

2591 top: md.Topology, 

2592 ref_geom: Union[md.Trajectory, None], 

2593 image_molecules: bool, 

2594 masses_in_groups: list[float], 

2595 xyz: Optional[np.ndarray] = None, 

2596 unitcell_vectors: Optional[np.ndarray] = None, 

2597 unitcell_info: Optional[np.ndarray] = None, 

2598 ) -> dask.delayed: 

2599 """The same as `transform()` but without the need to pickle `traj`. 

2600 

2601 When dask delayed concurrencies are distributed, required python objects 

2602 are pickled. Thus, every feature needs to have its own pickled traj. 

2603 That defeats the purpose of dask distributed. Thus, this method implements 

2604 the same calculations as `transform` as a more barebones approach. 

2605 It foregoes the checks for periodicity and unit-cell shape and just 

2606 takes xyz, unitcell vectors, and unitcell info. Furthermore, it is a 

2607 staticmethod, so it doesn't require `self` to function. However, it 

2608 needs the indexes in `self.indexes`. That's why the `dask_indices` 

2609 property informs the scheduler to also pickle and pass this object to 

2610 the workers. 

2611 

2612 Args: 

2613 indexes (np.ndarray): For this special feature, the indexes argument 

2614 in the @staticmethod dask_transform is `self.group_definitions`. 

2615 periodic (bool): Whether to observe the minimum image convention 

2616 and respect proteins breaking over the periodic boundary 

2617 condition as a whole (True). In this case, the trajectory container 

2618 in `traj` needs to have unitcell information. Defaults to True. 

2619 threshold (float): The threshold in nm, under which a distance is 

2620 considered to be a contact. Defaults to 5.0 nm. 

2621 count_contacts (bool): When True, return an integer of the number of contacts 

2622 instead of returning the array of regular contacts. 

2623 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

2624 in nanometer. If None is provided, the coordinates of `self.traj` 

2625 will be used. Otherwise, the topology of this set of xyz 

2626 coordinates should match the topology of `self.atom`. 

2627 Defaults to None. 

2628 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

2629 True, the `unitcell_vectors` are needed to calculate the 

2630 minimum image convention in a periodic space. This numpy 

2631 array should have the shape (n_frames, 3, 3). The rows of this 

2632 array correlate to the Bravais vectors a, b, and c. 

2633 unitcell_info (Optional[np.ndarray]): Basically identical to 

2634 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

2635 the first three columns are the unitcell_lengths in nanometer. 

2636 The other three columns are the unitcell_angles in degrees. 

2637 

2638 """ 

2639 # create a dummy traj, with the appropriate topology 

2640 traj = md.Trajectory( 

2641 xyz=xyz, 

2642 topology=top, 

2643 unitcell_lengths=unitcell_info[:, :3], 

2644 unitcell_angles=unitcell_info[:, 3:], 

2645 ) 

2646 COM_xyz = [] 

2647 if ref_geom is not None: 

2648 traj = traj.superpose(ref_geom) 

2649 if image_molecules: 

2650 traj = traj.image_molecules() 

2651 for aas, mms in zip(indexes, masses_in_groups): 

2652 COM_xyz.append( 

2653 np.average( 

2654 traj.xyz[ 

2655 :, 

2656 aas, 

2657 ], 

2658 axis=1, 

2659 weights=mms, 

2660 ) 

2661 ) 

2662 return np.hstack(COM_xyz) 

2663 

2664 def transform( 

2665 self, 

2666 xyz: Optional[np.ndarray] = None, 

2667 unitcell_vectors: Optional[np.ndarray] = None, 

2668 unitcell_info: Optional[np.ndarray] = None, 

2669 ) -> np.ndarray: 

2670 """Takes xyz and unitcell information to apply the topological calculations on. 

2671 

2672 When this method is not provided with any input, it will take the 

2673 traj_container provided as `traj` in the `__init__()` method and transforms 

2674 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer 

2675 of a trajectory with identical topology as `self.traj`. If `periodic` was 

2676 set to True, `unitcell_vectors` and `unitcell_info` should also be provided. 

2677 

2678 Args: 

2679 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

2680 in nanometer. If None is provided, the coordinates of `self.traj` 

2681 will be used. Otherwise, the topology of this set of xyz 

2682 coordinates should match the topology of `self.atom`. 

2683 Defaults to None. 

2684 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

2685 True, the `unitcell_vectors` are needed to calculate the 

2686 minimum image convention in a periodic space. This numpy 

2687 array should have the shape (n_frames, 3, 3). The rows of this 

2688 array correlate to the Bravais vectors a, b, and c. 

2689 unitcell_info (Optional[np.ndarray]): Basically identical to 

2690 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

2691 the first three columns are the unitcell_lengths in nanometer. 

2692 The other three columns are the unitcell_angles in degrees. 

2693 

2694 Returns: 

2695 np.ndarray: The result of the computation with shape (n_frames, n_indexes). 

2696 

2697 """ 

2698 ( 

2699 xyz, 

2700 unitcell_vectors, 

2701 unitcell_info, 

2702 ) = Feature.transform(self, xyz, unitcell_vectors, unitcell_info) 

2703 # create a dummy traj, with the appropriate topology 

2704 traj = md.Trajectory( 

2705 xyz=xyz, 

2706 topology=self.traj.top, 

2707 unitcell_lengths=( 

2708 unitcell_info[:, :3] if unitcell_info is not None else None 

2709 ), 

2710 unitcell_angles=unitcell_info[:, 3:] if unitcell_info is not None else None, 

2711 ) 

2712 COM_xyz = [] 

2713 if self.ref_geom is not None: 

2714 traj = traj.superpose(self.ref_geom) 

2715 if self.image_molecules: 

2716 traj = traj.image_molecules() 

2717 for aas, mms in zip(self.group_definitions, self.masses_in_groups): 

2718 COM_xyz.append( 

2719 np.average( 

2720 traj.xyz[ 

2721 :, 

2722 aas, 

2723 ], 

2724 axis=1, 

2725 weights=mms, 

2726 ) 

2727 ) 

2728 return np.hstack(COM_xyz) 

2729 

2730 

2731class ResidueCOMFeature(GroupCOMFeature): 

2732 def __init__( 

2733 self, 

2734 traj: SingleTraj, 

2735 residue_indices: Sequence[int], 

2736 residue_atoms: np.ndarray, 

2737 scheme: Literal["all", "backbone", "sidechain"] = "all", 

2738 ref_geom: Optional[md.Trajectory] = None, 

2739 image_molecules: bool = False, 

2740 mass_weighted: bool = True, 

2741 delayed: bool = False, 

2742 ) -> None: 

2743 """Instantiate the ResidueCOMFeature. 

2744 

2745 Args: 

2746 residue_indices (Sequence[int]): The residue indices for which the 

2747 COM will be computed. These are always zero-indexed that are not 

2748 necessarily the residue sequence record of the topology (resSeq). 

2749 resSeq indices start at least at 1 but can depend on the topology. 

2750 Furthermore, resSeq numbers can be duplicated across chains; residue 

2751 indices are always unique. 

2752 

2753 

2754 """ 

2755 super(ResidueCOMFeature, self).__init__( 

2756 traj, 

2757 residue_atoms, 

2758 mass_weighted=mass_weighted, 

2759 ref_geom=ref_geom, 

2760 image_molecules=image_molecules, 

2761 delayed=delayed, 

2762 ) 

2763 

2764 self.residue_indices = residue_indices 

2765 self.scheme = scheme 

2766 

2767 self._describe = [] 

2768 for ri in self.residue_indices: 

2769 for coor in "xyz": 

2770 self._describe.append( 

2771 f"{self.traj.top.residue(ri)} COM-{coor} ({self.scheme})" 

2772 ) 

2773 

2774 

2775class SideChainTorsions(DihedralFeature): 

2776 options = ("chi1", "chi2", "chi3", "chi4", "chi5") 

2777 

2778 def __init__( 

2779 self, 

2780 traj: Union[SingleTraj, TrajEnsemble], 

2781 selstr: Optional[str] = None, 

2782 deg: bool = False, 

2783 cossin: bool = False, 

2784 periodic: bool = True, 

2785 which: Union[ 

2786 Literal["all"], Sequence[Literal["chi1", "chi2", "chi3", "chi4", "chi5"]] 

2787 ] = "all", 

2788 delayed: bool = False, 

2789 ) -> None: 

2790 if not isinstance(which, (tuple, list)): 

2791 which = [which] 

2792 if not set(which).issubset(set(self.options) | {"all"}): 

2793 raise ValueError( 

2794 'Argument "which" should only contain one of {}, but was {}'.format( 

2795 ["all"] + list(self.options), which 

2796 ) 

2797 ) 

2798 if "all" in which: 

2799 which = self.options 

2800 

2801 # get all dihedral index pairs 

2802 indices_dict = {k: getattr(traj, "indices_%s" % k) for k in which} 

2803 if selstr: 

2804 selection = traj.top.select(selstr) 

2805 truncated_indices_dict = {} 

2806 for k, inds in indices_dict.items(): 

2807 mask = np.in1d(inds[:, 1], selection, assume_unique=True) 

2808 truncated_indices_dict[k] = inds[mask] 

2809 indices_dict = truncated_indices_dict 

2810 

2811 valid = {k: indices_dict[k] for k in indices_dict if indices_dict[k].size > 0} 

2812 if not valid: 

2813 raise ValueError( 

2814 "Could not determine any side chain dihedrals for your topology!" 

2815 ) 

2816 self._prefix_label_lengths = np.array( 

2817 [len(indices_dict[k]) if k in which else 0 for k in self.options] 

2818 ) 

2819 indices = np.vstack(list(valid.values())) 

2820 

2821 super(SideChainTorsions, self).__init__( 

2822 traj=traj, 

2823 dih_indexes=indices, 

2824 deg=deg, 

2825 cossin=cossin, 

2826 periodic=periodic, 

2827 delayed=delayed, 

2828 ) 

2829 

2830 def describe(self) -> list[str]: 

2831 """Gives a list of strings describing this feature's feature-axis. 

2832 

2833 A feature computes a collective variable (CV). A CV is aligned with an MD 

2834 trajectory on the time/frame-axis. The feature axis is unique for every 

2835 feature. A feature describing the backbone torsions (phi, omega, psi) would 

2836 have a feature axis with the size 3*n-3, where n is the number of residues. 

2837 The end-to-end distance of a linear protein in contrast would just have 

2838 a feature axis with length 1. This `describe()` method will label these 

2839 values unambiguously. A backbone torsion feature's `describe()` could be 

2840 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

2841 The end-to-end distance feature could be described by 

2842 ['distance_between_MET1_and_LYS80']. 

2843 

2844 Returns: 

2845 list[str]: The labels of this feature. The length 

2846 is determined by the `dih_indexes` and the `cossin` argument 

2847 in the `__init__()` method. If `cossin` is false, then 

2848 `len(describe()) == self.angle_indexes[-1]`, else `len(describe())` 

2849 is twice as long. 

2850 

2851 """ 

2852 getlbl = lambda at: "%i %s %i" % ( 

2853 at.residue.chain.index, 

2854 at.residue.name, 

2855 at.residue.resSeq, 

2856 ) 

2857 prefixes = [] 

2858 for lengths, label in zip(self._prefix_label_lengths, self.options): 

2859 if self.cossin: 

2860 lengths *= 2 

2861 prefixes.extend([label.upper()] * lengths) 

2862 

2863 if self.cossin: 

2864 cossin = ("COS({dih} {res})", "SIN({dih} {res})") 

2865 labels = [ 

2866 s.format( 

2867 dih=prefixes[j + len(cossin) * i], 

2868 res=getlbl(self.top.atom(ires[1])), 

2869 ) 

2870 for i, ires in enumerate(self.angle_indexes) 

2871 for j, s in enumerate(cossin) 

2872 ] 

2873 else: 

2874 labels = [ 

2875 "{dih} {res}".format( 

2876 dih=prefixes[i], res=getlbl(self.top.atom(ires[1])) 

2877 ) 

2878 for i, ires in enumerate(self.angle_indexes) 

2879 ] 

2880 

2881 return labels 

2882 

2883 

2884class MinRmsdFeature(Feature): 

2885 _nonstandard_transform_args: list[str] = [ 

2886 "top", 

2887 "ref", 

2888 ] 

2889 

2890 def __init__( 

2891 self, 

2892 traj: SingleTraj, 

2893 ref: Union[md.Trajectory, SingleTraj], 

2894 ref_frame: int = 0, 

2895 atom_indices: Optional[np.ndarray] = None, 

2896 precentered: bool = False, 

2897 delayed: bool = False, 

2898 ) -> None: 

2899 # Encodermap imports 

2900 from encodermap.trajinfo.info_single import SingleTraj 

2901 

2902 self._raise_on_unitcell = False 

2903 self.traj = traj 

2904 self.top = self.traj.top 

2905 assert isinstance( 

2906 ref_frame, int 

2907 ), f"ref_frame has to be of type integer, and not {type(ref_frame)}" 

2908 

2909 if isinstance(ref, (md.Trajectory, SingleTraj)): 

2910 self.name = f"MinRmsdFeature_with_{ref.n_atoms}_atoms_in_reference" 

2911 else: 

2912 raise TypeError( 

2913 f"input reference has to be either `encodermap.SingleTraj` or " 

2914 f"a mdtraj.Trajectory object, and not of {ref}" 

2915 ) 

2916 

2917 self.ref = ref 

2918 self.ref_frame = ref_frame 

2919 self.atom_indices = atom_indices 

2920 self.precentered = precentered 

2921 self.dimension = 1 

2922 self.delayed = delayed 

2923 

2924 @property 

2925 def dask_indices(self) -> str: 

2926 """str: The name of the delayed transformation to carry out with this feature.""" 

2927 return "atom_indices" 

2928 

2929 def describe(self) -> list[str]: 

2930 """Gives a list of strings describing this feature's feature-axis. 

2931 

2932 A feature computes a collective variable (CV). A CV is aligned with an MD 

2933 trajectory on the time/frame-axis. The feature axis is unique for every 

2934 feature. A feature describing the backbone torsions (phi, omega, psi) would 

2935 have a feature axis with the size 3*n-3, where n is the number of residues. 

2936 The end-to-end distance of a linear protein in contrast would just have 

2937 a feature axis with length 1. This `describe()` method will label these 

2938 values unambiguously. A backbone torsion feature's `describe()` could be 

2939 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

2940 The end-to-end distance feature could be described by 

2941 ['distance_between_MET1_and_LYS80']. 

2942 

2943 Returns: 

2944 list[str]: The labels of this feature. 

2945 

2946 """ 

2947 label = "minrmsd to frame %u of %s" % (self.ref_frame, self.name) 

2948 if self.precentered: 

2949 label += ", precentered=True" 

2950 if self.atom_indices is not None: 

2951 label += ", subset of atoms " 

2952 return [label] 

2953 

2954 @property 

2955 def dask_indices(self): 

2956 """str: The name of the delayed transformation to carry out with this feature.""" 

2957 return "atom_indices" 

2958 

2959 @staticmethod 

2960 @dask.delayed 

2961 def dask_transform( 

2962 indexes: np.ndarray, 

2963 top: md.Topology, 

2964 ref: md.Trajectory, 

2965 xyz: Optional[np.ndarray] = None, 

2966 unitcell_vectors: Optional[np.ndarray] = None, 

2967 unitcell_info: Optional[np.ndarray] = None, 

2968 ) -> np.ndarray: 

2969 """Takes xyz and unitcell information to apply the topological calculations on. 

2970 

2971 Args: 

2972 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

2973 in nanometer. If None is provided, the coordinates of `self.traj` 

2974 will be used. Otherwise, the topology of this set of xyz 

2975 coordinates should match the topology of `self.atom`. 

2976 Defaults to None. 

2977 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

2978 True, the `unitcell_vectors` are needed to calculate the 

2979 minimum image convention in a periodic space. This numpy 

2980 array should have the shape (n_frames, 3, 3). The rows of this 

2981 array correlate to the Bravais vectors a, b, and c. 

2982 unitcell_info (Optional[np.ndarray]): Basically identical to 

2983 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

2984 the first three columns are the unitcell_lengths in nanometer. 

2985 The other three columns are the unitcell_angles in degrees. 

2986 

2987 Returns: 

2988 np.ndarray: The result of the computation with shape (n_frames, n_indexes). 

2989 

2990 """ 

2991 # create a dummy traj, with the appropriate topology 

2992 traj = md.Trajectory( 

2993 xyz=xyz, 

2994 topology=top, 

2995 unitcell_lengths=unitcell_info[:, :3], 

2996 unitcell_angles=unitcell_info[:, 3:], 

2997 ) 

2998 

2999 return np.array(md.rmsd(traj, ref, atom_indices=indexes), ndmin=2).T 

3000 

3001 def transform( 

3002 self, 

3003 xyz: Optional[np.ndarray] = None, 

3004 unitcell_vectors: Optional[np.ndarray] = None, 

3005 unitcell_info: Optional[np.ndarray] = None, 

3006 ) -> np.ndarray: 

3007 """Takes xyz and unitcell information to apply the topological calculations on. 

3008 

3009 When this method is not provided with any input, it will take the 

3010 traj_container provided as `traj` in the `__init__()` method and transforms 

3011 this trajectory. The argument `xyz` can be the xyz coordinates in nanometer 

3012 of a trajectory with identical topology as `self.traj`. If `periodic` was 

3013 set to True, `unitcell_vectors` and `unitcell_info` should also be provided. 

3014 

3015 Args: 

3016 xyz (Optional[np.ndarray]): A numpy array with shape (n_frames, n_atoms, 3) 

3017 in nanometer. If None is provided, the coordinates of `self.traj` 

3018 will be used. Otherwise, the topology of this set of xyz 

3019 coordinates should match the topology of `self.atom`. 

3020 Defaults to None. 

3021 unitcell_vectors (Optional[np.ndarray]): When periodic is set to 

3022 True, the `unitcell_vectors` are needed to calculate the 

3023 minimum image convention in a periodic space. This numpy 

3024 array should have the shape (n_frames, 3, 3). The rows of this 

3025 array correlate to the Bravais vectors a, b, and c. 

3026 unitcell_info (Optional[np.ndarray]): Basically identical to 

3027 `unitcell_vectors`. A numpy array of shape (n_frames, 6), where 

3028 the first three columns are the unitcell_lengths in nanometer. 

3029 The other three columns are the unitcell_angles in degrees. 

3030 

3031 Returns: 

3032 np.ndarray: The result of the computation with shape (n_frames, n_indexes). 

3033 

3034 """ 

3035 ( 

3036 xyz, 

3037 unitcell_vectors, 

3038 unitcell_info, 

3039 ) = Feature.transform(self, xyz, unitcell_vectors, unitcell_info) 

3040 

3041 # create a dummy traj, with the appropriate topology 

3042 traj = md.Trajectory( 

3043 xyz=xyz, 

3044 topology=self.traj.top, 

3045 unitcell_lengths=unitcell_info[:, :3], 

3046 unitcell_angles=unitcell_info[:, 3:], 

3047 ) 

3048 

3049 return np.array( 

3050 md.rmsd(traj, self.ref, atom_indices=self.atom_indices), ndmin=2 

3051 ).T 

3052 

3053 

3054################################################################################ 

3055# EncoderMap features 

3056################################################################################ 

3057 

3058 

3059class CentralDihedrals(DihedralFeature): 

3060 """Feature that collects all dihedrals in the backbone of a topology. 

3061 

3062 Attributes: 

3063 top (mdtraj.Topology): Topology of this feature. 

3064 indexes (np.ndarray): The numpy array returned from `top.select('all')`. 

3065 

3066 """ 

3067 

3068 def __init__( 

3069 self, 

3070 traj: Union[SingleTraj, TrajEnsemble], 

3071 selstr: Optional[str] = None, 

3072 deg: bool = False, 

3073 cossin: bool = False, 

3074 periodic: bool = True, 

3075 omega: bool = True, 

3076 generic_labels: bool = False, 

3077 check_aas: bool = True, 

3078 delayed: bool = False, 

3079 ): 

3080 """Instantiate this feature class. 

3081 

3082 Args: 

3083 traj (em.SingleTraj): A topology to build features from. 

3084 selstr (Optional[str]): A string, that limits the selection of dihedral angles. 

3085 Only dihedral angles which atoms are represented by the `selstr` argument 

3086 are considered. This selection string follows MDTraj's atom selection 

3087 language: https://mdtraj.org/1.9.3/atom_selection.html. Can also 

3088 be None, in which case all backbone dihedrals (also omega) are 

3089 considered. Defaults to None. 

3090 deg (bool): Whether to return the result in degree (`deg=True`) or in 

3091 radians (`deg=False`). Defaults to False (radians). 

3092 cossin (bool): If True, each angle will be returned as a pair of 

3093 (sin(x), cos(x)). This is useful, if you calculate the means 

3094 (e.g. TICA/PCA, clustering) in that space. Defaults to False. 

3095 periodic (bool): Whether to recognize periodic boundary conditions and 

3096 work under the minimum image convention. Defaults to True. 

3097 

3098 """ 

3099 self.traj = traj 

3100 self.selstr = selstr 

3101 self.omega = omega 

3102 

3103 indices = self.traj.indices_psi 

3104 if not selstr: 

3105 self._psi_inds = indices 

3106 else: 

3107 self._psi_inds = indices[ 

3108 np.in1d(indices[:, 1], self.top.select(selstr), assume_unique=True) 

3109 ] 

3110 

3111 self.omega = omega 

3112 if self.omega: 

3113 indices = self.traj.indices_omega 

3114 if not selstr: 

3115 self._omega_inds = indices 

3116 else: 

3117 self._omega_inds = indices[ 

3118 np.in1d(indices[:, 1], self.top.select(selstr), assume_unique=True) 

3119 ] 

3120 

3121 indices = self.traj.indices_phi 

3122 if not selstr: 

3123 self._phi_inds = indices 

3124 else: 

3125 self._phi_inds = indices[ 

3126 np.in1d(indices[:, 1], self.top.select(selstr), assume_unique=True) 

3127 ] 

3128 

3129 if self.omega: 

3130 zipped = list(zip(self._psi_inds, self._omega_inds, self._phi_inds)) 

3131 else: 

3132 zipped = list(zip(self._psi_inds, self._phi_inds)) 

3133 

3134 # alternate phi, psi , omega pairs (phi_1, psi_1, omega_1..., phi_n, psi_n, omega_n) 

3135 dih_indexes = np.array(zipped).reshape(-1, 4) 

3136 

3137 # set generic_labels for xarray 

3138 if generic_labels: 

3139 self.describe = self.generic_describe 

3140 

3141 super(CentralDihedrals, self).__init__( 

3142 self.traj, 

3143 dih_indexes, 

3144 deg=deg, 

3145 cossin=cossin, 

3146 periodic=periodic, 

3147 check_aas=check_aas, 

3148 delayed=delayed, 

3149 ) 

3150 

3151 @property 

3152 def name(self) -> str: 

3153 """str: The name of the class: "CentralDihedrals".""" 

3154 return "CentralDihedrals" 

3155 

3156 @property 

3157 def indexes(self) -> np.ndarray: 

3158 """np.ndarray: A (n_angles, 4) shaped numpy array giving the atom indices 

3159 of the dihedral angles to be calculated.""" 

3160 return self.angle_indexes.astype("int32") 

3161 

3162 def generic_describe(self) -> list[str]: 

3163 """Returns a list of generic labels, not containing residue names. 

3164 These can be used to stack tops of different topology. 

3165 

3166 Returns: 

3167 list[str]: A list of labels. 

3168 

3169 """ 

3170 if hasattr(self.traj, "clustal_w"): 

3171 clustal_w = np.array([*self.traj.clustal_w]) 

3172 count = len(np.where(clustal_w != "-")[0]) 

3173 assert count == self.traj.n_residues, ( 

3174 f"Provided clustal W alignment {self.traj.clustal_w} does not " 

3175 f"contain as many residues as traj {self.traj.n_residues}. Can not " 

3176 f"use this alignment." 

3177 ) 

3178 _psi_inds = (np.arange(len(clustal_w)) + 1)[clustal_w != "-"][ 

3179 :-1 

3180 ] # last residue can't have psi 

3181 _phi_inds = (np.arange(len(clustal_w)) + 1)[clustal_w != "-"][ 

3182 1: 

3183 ] # first residue can't have phi 

3184 if self.omega: 

3185 _omega_inds = (np.arange(len(clustal_w)) + 1)[clustal_w != "-"][ 

3186 :-1 

3187 ] # last residue can't have omega 

3188 assert len(_psi_inds) == len(self._psi_inds) 

3189 assert len(_phi_inds) == len(self._phi_inds) 

3190 if self.omega: 

3191 assert len(_omega_inds) == len(self._omega_inds) 

3192 else: 

3193 _psi_inds = np.arange(len(self._psi_inds)) + 1 

3194 _phi_inds = np.arange(len(self._phi_inds)) + 1 

3195 if self.omega: 

3196 _omega_inds = np.arange(len(self._omega_inds)) + 1 

3197 

3198 if self.cossin: 

3199 sin_cos = ("COS(PSI %s)", "SIN(PSI %s)") 

3200 labels_psi = [ 

3201 ( 

3202 sin_cos[0] % i, 

3203 sin_cos[1] % i, 

3204 ) 

3205 for i in _psi_inds 

3206 ] 

3207 if self.omega: 

3208 sin_cos = ("COS(OMEGA %s)", "SIN(OMEGA %s)") 

3209 labels_omega = [ 

3210 ( 

3211 sin_cos[0] % i, 

3212 sin_cos[1] % i, 

3213 ) 

3214 for i in _omega_inds 

3215 ] 

3216 sin_cos = ("COS(PHI %s)", "SIN(PHI %s)") 

3217 labels_phi = [ 

3218 ( 

3219 sin_cos[0] % i, 

3220 sin_cos[1] % i, 

3221 ) 

3222 for i in _phi_inds 

3223 ] 

3224 # produce the same ordering as the given indices (phi_1, psi_1, ..., phi_n, psi_n) 

3225 # or (cos(phi_1), sin(phi_1), cos(psi_1), sin(psi_1), ..., cos(phi_n), sin(phi_n), cos(psi_n), sin(psi_n)) 

3226 if self.omega: 

3227 zipped = zip(labels_psi, labels_omega, labels_phi) 

3228 else: 

3229 zipped = zip(labels_psi, labels_phi) 

3230 

3231 res = list( 

3232 itertools.chain.from_iterable(itertools.chain.from_iterable(zipped)) 

3233 ) 

3234 else: 

3235 labels_psi = [f"CENTERDIH PSI {i}" for i in _psi_inds] 

3236 if self.omega: 

3237 labels_omega = [f"CENTERDIH OMEGA {i}" for i in _omega_inds] 

3238 labels_phi = [f"CENTERDIH PHI {i}" for i in _phi_inds] 

3239 if self.omega: 

3240 zipped = zip(labels_psi, labels_omega, labels_phi) 

3241 else: 

3242 zipped = zip(labels_psi, labels_phi) 

3243 res = list(itertools.chain.from_iterable(zipped)) 

3244 

3245 return res 

3246 

3247 def describe(self) -> list[str]: 

3248 """Gives a list of strings describing this feature's feature-axis. 

3249 

3250 A feature computes a collective variable (CV). A CV is aligned with an MD 

3251 trajectory on the time/frame-axis. The feature axis is unique for every 

3252 feature. A feature describing the backbone torsions (phi, omega, psi) would 

3253 have a feature axis with the size 3*n-3, where n is the number of residues. 

3254 The end-to-end distance of a linear protein in contrast would just have 

3255 a feature axis with length 1. This `describe()` method will label these 

3256 values unambiguously. A backbone torsion feature's `describe()` could be 

3257 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

3258 The end-to-end distance feature could be described by 

3259 ['distance_between_MET1_and_LYS80']. 

3260 

3261 Returns: 

3262 list[str]: The labels of this feature. The length 

3263 is determined by the `dih_indexes` and the `cossin` argument 

3264 in the `__init__()` method. If `cossin` is false, then 

3265 `len(describe()) == self.angle_indexes[-1]`, else `len(describe())` 

3266 is twice as long. 

3267 

3268 """ 

3269 top = self.top 

3270 getlbl = ( 

3271 lambda at: f"RESID {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}" 

3272 ) 

3273 

3274 if self.cossin: 

3275 sin_cos = ("COS(PSI %s)", "SIN(PSI %s)") 

3276 labels_psi = [ 

3277 ( 

3278 sin_cos[0] % getlbl(top.atom(ires[1])), 

3279 sin_cos[1] % getlbl(top.atom(ires[1])), 

3280 ) 

3281 for ires in self._psi_inds 

3282 ] 

3283 if self.omega: 

3284 sin_cos = ("COS(OMEGA %s)", "SIN(OMEGA %s)") 

3285 labels_omega = [ 

3286 ( 

3287 sin_cos[0] % getlbl(top.atom(ires[1])), 

3288 sin_cos[1] % getlbl(top.atom(ires[1])), 

3289 ) 

3290 for ires in self._omega_inds 

3291 ] 

3292 sin_cos = ("COS(PHI %s)", "SIN(PHI %s)") 

3293 labels_phi = [ 

3294 ( 

3295 sin_cos[0] % getlbl(top.atom(ires[1])), 

3296 sin_cos[1] % getlbl(top.atom(ires[1])), 

3297 ) 

3298 for ires in self._phi_inds 

3299 ] 

3300 # produce the same ordering as the given indices (phi_1, psi_1, ..., phi_n, psi_n) 

3301 # or (cos(phi_1), sin(phi_1), cos(psi_1), sin(psi_1), ..., cos(phi_n), sin(phi_n), cos(psi_n), sin(psi_n)) 

3302 if self.omega: 

3303 zipped = zip(labels_psi, labels_omega, labels_phi) 

3304 else: 

3305 zip(labels_psi, labels_phi) 

3306 

3307 res = list( 

3308 itertools.chain.from_iterable(itertools.chain.from_iterable(zipped)) 

3309 ) 

3310 else: 

3311 labels_psi = [ 

3312 f"CENTERDIH PSI " + getlbl(top.atom(ires[1])) 

3313 for ires in self._psi_inds 

3314 ] 

3315 if self.omega: 

3316 labels_omega = [ 

3317 "CENTERDIH OMEGA " + getlbl(top.atom(ires[1])) 

3318 for ires in self._omega_inds 

3319 ] 

3320 labels_phi = [ 

3321 "CENTERDIH PHI " + getlbl(top.atom(ires[1])) 

3322 for ires in self._phi_inds 

3323 ] 

3324 if self.omega: 

3325 zipped = zip(labels_psi, labels_omega, labels_phi) 

3326 else: 

3327 zipped = zip(labels_psi, labels_phi) 

3328 res = list(itertools.chain.from_iterable(zipped)) 

3329 return res 

3330 

3331 

3332class SideChainDihedrals(DihedralFeature): 

3333 """Feature that collects all dihedrals in the backbone of a topology. 

3334 

3335 Attributes: 

3336 top (mdtraj.Topology): Topology of this feature. 

3337 indexes (np.ndarray): The numpy array returned from `top.select('all')`. 

3338 options (list[str]): A list of possible sidechain angles ['chi1' to 'chi5']. 

3339 

3340 """ 

3341 

3342 options: list[str] = ["chi1", "chi2", "chi3", "chi4", "chi5"] 

3343 

3344 def __init__( 

3345 self, 

3346 traj: Union[SingleTraj, TrajEnsemble], 

3347 selstr: Optional[str] = None, 

3348 deg: bool = False, 

3349 cossin: bool = False, 

3350 periodic: bool = True, 

3351 generic_labels: bool = False, 

3352 check_aas: bool = True, 

3353 delayed: bool = False, 

3354 ) -> None: 

3355 which = self.options 

3356 # get all dihedral index pairs 

3357 indices_dict = {k: getattr(traj, f"indices_{k}") for k in which} 

3358 if selstr: 

3359 selection = traj.top.select(selstr) 

3360 truncated_indices_dict = {} 

3361 for k, inds in indices_dict.items(): 

3362 mask = np.in1d(inds[:, 1], selection, assume_unique=True) 

3363 truncated_indices_dict[k] = inds[mask] 

3364 indices_dict = truncated_indices_dict 

3365 

3366 valid = {k: indices_dict[k] for k in indices_dict if indices_dict[k].size > 0} 

3367 if not valid: 

3368 # Third Party Imports 

3369 import mdtraj 

3370 

3371 try: 

3372 mdtraj.compute_chi1(traj) 

3373 except Exception as e: 

3374 raise ValueError( 

3375 "Could not determine any side chain dihedrals for your topology! " 

3376 "This is an error inside MDTraj. It errors with this message: " 

3377 f"{e}. You can try to provide a custom_topology " 

3378 f"for this protein to supersede MDTraj's sidechain recognition " 

3379 f"algorithm." 

3380 ) from e 

3381 else: 

3382 raise ValueError(f"No sidechain dihedrals for the trajectory {traj=}.") 

3383 

3384 # for proteins that don't have some chi angles we filter which 

3385 which = list( 

3386 filter( 

3387 lambda x: True if len(indices_dict[x]) > 0 else False, 

3388 indices_dict.keys(), 

3389 ) 

3390 ) 

3391 

3392 # change the sorting to be per-residue and not all chi1 and then all chi2 angles 

3393 self.per_res_dict = {} 

3394 for r in traj.top.residues: 

3395 arrs = [] 

3396 bools = [] 

3397 for k in which: 

3398 if np.any(np.in1d(valid[k], np.array([a.index for a in r.atoms]))): 

3399 where = np.where( 

3400 np.in1d( 

3401 valid[k].flatten(), np.array([a.index for a in r.atoms]) 

3402 ) 

3403 )[0] 

3404 arr = valid[k].flatten()[where] 

3405 bools.append(True) 

3406 arrs.append(arr) 

3407 else: 

3408 bools.append(False) 

3409 if any(bools): 

3410 self.per_res_dict[str(r)] = np.vstack(arrs) 

3411 

3412 self._prefix_label_lengths = np.array( 

3413 [len(indices_dict[k]) if k in which else 0 for k in self.options] 

3414 ) 

3415 indices = np.vstack([v for v in self.per_res_dict.values()]) 

3416 

3417 super(SideChainDihedrals, self).__init__( 

3418 traj=traj, 

3419 dih_indexes=indices, 

3420 deg=deg, 

3421 cossin=cossin, 

3422 periodic=periodic, 

3423 check_aas=check_aas, 

3424 delayed=delayed, 

3425 ) 

3426 

3427 if generic_labels: 

3428 self.describe = self.generic_describe 

3429 

3430 @property 

3431 def name(self) -> str: 

3432 """str: The name of the class: "SideChainDihedrals".""" 

3433 return "SideChainDihedrals" 

3434 

3435 @property 

3436 def indexes(self) -> np.ndarray: 

3437 """np.ndarray: A (n_angles, 4) shaped numpy array giving the atom indices 

3438 of the dihedral angles to be calculated.""" 

3439 return self.angle_indexes 

3440 

3441 def generic_describe(self) -> list[str]: 

3442 """Returns a list of generic labels, not containing residue names. 

3443 These can be used to stack tops of different topology. 

3444 

3445 Returns: 

3446 list[str]: A list of labels. 

3447 

3448 """ 

3449 if hasattr(self.traj, "clustal_w"): 

3450 residue_mapping = {} 

3451 i = 1 

3452 j = 1 

3453 for res in [*self.traj.clustal_w]: 

3454 if res == "-": 

3455 j += 1 

3456 continue 

3457 residue_mapping[i] = j 

3458 i += 1 

3459 j += 1 

3460 

3461 def getlbl(at: md.topology.Atom): 

3462 resSeq = at.residue.resSeq 

3463 resSeq = residue_mapping[resSeq] 

3464 r = f"RESID {at.residue.name}:{resSeq:>4} CHAIN {at.residue.chain.index}" 

3465 return r 

3466 

3467 else: 

3468 getlbl = ( 

3469 lambda at: f"RESID {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}" 

3470 ) 

3471 prefixes = [] 

3472 for lengths, label in zip(self._prefix_label_lengths, self.options): 

3473 if self.cossin: 

3474 lengths *= 2 

3475 prefixes.extend([label.upper()] * lengths) 

3476 prefixes = [] 

3477 for key, value in self.per_res_dict.items(): 

3478 if self.cossin: 

3479 prefixes.extend( 

3480 [opt.upper() for opt in self.options[: value.shape[0]]] * 2 

3481 ) 

3482 else: 

3483 prefixes.extend([opt.upper() for opt in self.options[: value.shape[0]]]) 

3484 

3485 if self.cossin: 

3486 cossin = ("COS({dih} {res})", "SIN({dih} {res})") 

3487 labels = [ 

3488 s.format( 

3489 dih=prefixes[j + len(cossin) * i], 

3490 res=getlbl(self.top.atom(ires[1])), 

3491 ) 

3492 for i, ires in enumerate(self.angle_indexes) 

3493 for j, s in enumerate(cossin) 

3494 ] 

3495 else: 

3496 labels = [ 

3497 "SIDECHDIH {dih} {res}".format( 

3498 dih=prefixes[i], res=getlbl(self.top.atom(ires[1])) 

3499 ) 

3500 for i, ires in enumerate(self.angle_indexes) 

3501 ] 

3502 labels = list(map(lambda x: x[:14] + x[27:31], labels)) 

3503 return labels 

3504 

3505 def describe(self) -> list[str]: 

3506 """Gives a list of strings describing this feature's feature-axis. 

3507 

3508 A feature computes a collective variable (CV). A CV is aligned with an MD 

3509 trajectory on the time/frame-axis. The feature axis is unique for every 

3510 feature. A feature describing the backbone torsions (phi, omega, psi) would 

3511 have a feature axis with the size 3*n-3, where n is the number of residues. 

3512 The end-to-end distance of a linear protein in contrast would just have 

3513 a feature axis with length 1. This `describe()` method will label these 

3514 values unambiguously. A backbone torsion feature's `describe()` could be 

3515 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

3516 The end-to-end distance feature could be described by 

3517 ['distance_between_MET1_and_LYS80']. 

3518 

3519 Returns: 

3520 list[str]: The labels of this feature. The length 

3521 is determined by the `dih_indexes` and the `cossin` argument 

3522 in the `__init__()` method. If `cossin` is false, then 

3523 `len(describe()) == self.angle_indexes[-1]`, else `len(describe())` 

3524 is twice as long. 

3525 

3526 """ 

3527 top = self.top 

3528 getlbl = ( 

3529 lambda at: f"RESID {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}" 

3530 ) 

3531 prefixes = [] 

3532 for lengths, label in zip(self._prefix_label_lengths, self.options): 

3533 if self.cossin: 

3534 lengths *= 2 

3535 prefixes.extend([label.upper()] * lengths) 

3536 prefixes = [] 

3537 for key, value in self.per_res_dict.items(): 

3538 if self.cossin: 

3539 prefixes.extend( 

3540 [opt.upper() for opt in self.options[: value.shape[0]]] * 2 

3541 ) 

3542 else: 

3543 prefixes.extend([opt.upper() for opt in self.options[: value.shape[0]]]) 

3544 

3545 if self.cossin: 

3546 cossin = ("COS({dih} {res})", "SIN({dih} {res})") 

3547 labels = [ 

3548 s.format( 

3549 dih=prefixes[j + len(cossin) * i], 

3550 res=getlbl(self.top.atom(ires[1])), 

3551 ) 

3552 for i, ires in enumerate(self.angle_indexes) 

3553 for j, s in enumerate(cossin) 

3554 ] 

3555 else: 

3556 labels = [ 

3557 "SIDECHDIH {dih} {res}".format( 

3558 dih=prefixes[i], res=getlbl(self.top.atom(ires[1])) 

3559 ) 

3560 for i, ires in enumerate(self.angle_indexes) 

3561 ] 

3562 

3563 return labels 

3564 

3565 

3566class AllCartesians(SelectionFeature): 

3567 """Feature that collects all cartesian positions of all atoms in the trajectory. 

3568 

3569 Note: 

3570 The order of the cartesians is not as in standard MD coordinates. 

3571 Rather than giving the positions of all atoms of the first residue, and 

3572 then all positions of the second, and so on, this feature gives all 

3573 central (backbone) cartesians first, followed by the cartesians of the 

3574 sidechains. This allows better and faster backmapping. See 

3575 `encodermap.misc.backmapping._full_backmapping_np` for mor info, 

3576 why this is easier. 

3577 

3578 Attributes: 

3579 top (mdtraj.Topology): Topology of this feature. 

3580 indexes (np.ndarray): The numpy array returned from `top.select('all')`. 

3581 prefix_label (str): A prefix for the labels. In this case, it is 'POSITION'. 

3582 

3583 """ 

3584 

3585 prefix_label: str = "POSITION " 

3586 

3587 def __init__( 

3588 self, 

3589 traj: SingleTraj, 

3590 check_aas: bool = True, 

3591 generic_labels: bool = False, 

3592 delayed: bool = False, 

3593 ) -> None: 

3594 """Instantiate the AllCartesians class. 

3595 

3596 Args: 

3597 traj (em.SingleTraj): A mdtraj topology. 

3598 

3599 """ 

3600 self.central_indices = CentralCartesians(traj).indexes 

3601 try: 

3602 indexes = np.concatenate( 

3603 [self.central_indices, SideChainCartesians(traj).indexes] 

3604 ) 

3605 except ValueError as e: 

3606 if "Could not determine" in str(e): 

3607 warnings.warn( 

3608 f"The topology of {traj} does not contain any sidechains. The " 

3609 f"`AllCartesians` feature will just contain backbone coordinates." 

3610 ) 

3611 indexes = CentralCartesians(traj).indexes 

3612 else: 

3613 raise e 

3614 super().__init__(traj, indexes=indexes, check_aas=check_aas, delayed=delayed) 

3615 if generic_labels: 

3616 self.describe = self.generic_describe 

3617 

3618 @property 

3619 def name(self) -> str: 

3620 """str: The name of this class: 'AllCartesians'""" 

3621 return "AllCartesians" 

3622 

3623 def generic_describe(self) -> list[str]: 

3624 """Returns a list of generic labels, not containing residue names. 

3625 These can be used to stack tops of different topology. 

3626 

3627 Returns: 

3628 list[str]: A list of labels. 

3629 

3630 """ 

3631 labels = [] 

3632 if hasattr(self.traj, "clustal_w"): 

3633 raise NotImplementedError( 

3634 f"SideChainCartesians can't currently handle alignments. The " 

3635 f"implementation below won't probably work." 

3636 ) 

3637 # clustal_w_ = [*self.traj.clustal_w] 

3638 # clustal_w = [None] * (len(clustal_w_) * 3) 

3639 # clustal_w[::3] = clustal_w_ 

3640 # clustal_w[1::3] = clustal_w_ 

3641 # clustal_w[2::3] = clustal_w_ 

3642 # clustal_w = np.array(clustal_w) 

3643 # indices = (np.arange(len(clustal_w)) + 1)[clustal_w != "-"] 

3644 # assert len(indices) == len( 

3645 # self.central_indexes 

3646 # ), f"{indices.shape=} {self.indexes.shape=} {clustal_w.shape=} {clustal_w[:20]}" 

3647 else: 

3648 indices = self.indexes 

3649 visited_residues = set() 

3650 for i in indices: 

3651 if i in self.central_indices: 

3652 position = "c" 

3653 else: 

3654 position = "s" 

3655 residx = self.traj.top.atom(i).residue.index + 1 

3656 rescode = str(residx) + position 

3657 if rescode not in visited_residues: 

3658 visited_residues.add(rescode) 

3659 atom_index = 1 

3660 for pos in ["X", "Y", "Z"]: 

3661 labels.append( 

3662 f"{self.prefix_label} {pos} {atom_index} {residx} {position}" 

3663 ) 

3664 atom_index += 1 

3665 return labels 

3666 

3667 def describe(self) -> list[str]: 

3668 """Gives a list of strings describing this feature's feature-axis. 

3669 

3670 A feature computes a collective variable (CV). A CV is aligned with an MD 

3671 trajectory on the time/frame-axis. The feature axis is unique for every 

3672 feature. A feature describing the backbone torsions (phi, omega, psi) would 

3673 have a feature axis with the size 3*n-3, where n is the number of residues. 

3674 The end-to-end distance of a linear protein in contrast would just have 

3675 a feature axis with length 1. This `describe()` method will label these 

3676 values unambiguously. A backbone torsion feature's `describe()` could be 

3677 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

3678 The end-to-end distance feature could be described by 

3679 ['distance_between_MET1_and_LYS80']. 

3680 

3681 Returns: 

3682 list[str]: The labels of this feature. This list has as many entries as atoms in `self.top`. 

3683 

3684 """ 

3685 getlbl = ( 

3686 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}" 

3687 ) 

3688 labels = [] 

3689 for i in self.indexes: 

3690 for pos in ["X", "Y", "Z"]: 

3691 labels.append( 

3692 f"{self.prefix_label} {pos} {getlbl(self.top.atom(i))}" 

3693 ) 

3694 return labels 

3695 

3696 

3697class CentralCartesians(SelectionFeature): 

3698 """Feature that collects all cartesian positions of the backbone atoms. 

3699 

3700 Examples: 

3701 >>> import encodermap as em 

3702 >>> from pprint import pprint 

3703 >>> traj = em.load_project("pASP_pGLU", 0)[0] 

3704 >>> traj # doctest: +ELLIPSIS 

3705 <encodermap.SingleTraj object...> 

3706 >>> feature = em.features.CentralCartesians(traj, generic_labels=False) 

3707 >>> pprint(feature.describe()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE 

3708 ['CENTERPOS X ATOM N: 0 GLU: 1 CHAIN 0', 

3709 'CENTERPOS Y ATOM N: 0 GLU: 1 CHAIN 0', 

3710 'CENTERPOS Z ATOM N: 0 GLU: 1 CHAIN 0', 

3711 'CENTERPOS X ATOM CA: 3 GLU: 1 CHAIN 0', 

3712 'CENTERPOS Y ATOM CA: 3 GLU: 1 CHAIN 0', 

3713 'CENTERPOS Z ATOM CA: 3 GLU: 1 CHAIN 0', 

3714 '... 

3715 'CENTERPOS Z ATOM C: 65 GLU: 6 CHAIN 0'] 

3716 >>> feature = em.features.CentralCartesians(traj, generic_labels=True) 

3717 >>> pprint(feature.describe()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE 

3718 ['CENTERPOS X 1', 

3719 'CENTERPOS Y 1', 

3720 'CENTERPOS Z 1', 

3721 'CENTERPOS X 2', 

3722 'CENTERPOS Y 2', 

3723 'CENTERPOS Z 2', 

3724 '... 

3725 'CENTERPOS Z 18'] 

3726 

3727 """ 

3728 

3729 prefix_label: str = "CENTERPOS" 

3730 

3731 def __init__( 

3732 self, 

3733 traj: SingleTraj, 

3734 generic_labels: bool = False, 

3735 check_aas: bool = True, 

3736 delayed: bool = False, 

3737 ) -> None: 

3738 """Instantiate the CentralCartesians class. 

3739 

3740 In contrary to PyEMMA (which has now been archived), this feature returns 

3741 a high-dimensional array along the feature axis. In PyEMMA's and now in 

3742 EncoderMap's `SelectionFeature`, the cartesian coordinates of the atoms are 

3743 returned as a list of [x1, y1, z1, x2, y2, z2, x3, ..., zn]. This feature 

3744 yields a (n_atoms, 3) array with an extra dimension (carteisan coordinate):: 

3745 

3746 [ 

3747 [x1, y1, z1], 

3748 [x2, y2, z2], 

3749 ..., 

3750 [xn, yn, zn], 

3751 ] 

3752 

3753 Args: 

3754 traj (SingleTraj): An instance of `encodermap.SingleTraj`. Using 

3755 `SingleTrajs` instead of `md.Topology` (as it was in PyEMMA), 

3756 offers access to EncoderMap's `CustomTopology`, which can be 

3757 used to adapt the featurization and NN training for a wide 

3758 range of protein and non-protein MD trajectories. 

3759 generic_labels (bool): Whether to use generic labels to describe the 

3760 feature. Generic labels can be used to align different topologies. 

3761 If False, the labels returned by this feature's `describe()` method 

3762 are topology-specific ("CENTERPOS X ATOM N: 0 ASP: 1 CHAIN 0"). 

3763 If True, the labels are generic ("CENTERPOS X 0") and can be 

3764 aligned with other Features, that contain topologies, of which ASP 

3765 might not be the first amino acid. 

3766 check_aas (bool): Whether to check if all residues in `traj` are 

3767 known, prior to computing. 

3768 

3769 """ 

3770 self.traj = traj 

3771 self.indexes = self.traj.top.select("name CA or name C or name N") 

3772 # filter out unwanted indexes 

3773 unwanted_resnames = [ 

3774 k for k, v in self.traj._custom_top.amino_acid_codes.items() if v is None 

3775 ] 

3776 self.indexes = np.array( 

3777 list( 

3778 filter( 

3779 lambda x: self.traj.top.atom(x).residue.name 

3780 not in unwanted_resnames, 

3781 self.indexes, 

3782 ) 

3783 ) 

3784 ) 

3785 super().__init__( 

3786 self.traj, indexes=self.indexes, check_aas=check_aas, delayed=delayed 

3787 ) 

3788 self.dimension = 3 * len(self.indexes) 

3789 

3790 if generic_labels: 

3791 self.describe = self.generic_describe 

3792 

3793 def generic_describe(self) -> list[str]: 

3794 """Returns a list of generic labels, not containing residue names. 

3795 These can be used to stack tops of different topology. 

3796 

3797 Returns: 

3798 list[str]: A list of labels. 

3799 

3800 """ 

3801 labels = [] 

3802 if hasattr(self.traj, "clustal_w"): 

3803 clustal_w_ = [*self.traj.clustal_w] 

3804 clustal_w = [None] * (len(clustal_w_) * 3) 

3805 clustal_w[::3] = clustal_w_ 

3806 clustal_w[1::3] = clustal_w_ 

3807 clustal_w[2::3] = clustal_w_ 

3808 clustal_w = np.array(clustal_w) 

3809 indices = (np.arange(len(clustal_w)) + 1)[clustal_w != "-"] 

3810 assert len(indices) == len( 

3811 self.indexes 

3812 ), f"{indices.shape=} {self.indexes.shape=} {clustal_w.shape=} {clustal_w[:20]}" 

3813 else: 

3814 indices = np.arange(len(self.indexes)) + 1 

3815 for i in indices: 

3816 for pos in ["X", "Y", "Z"]: 

3817 labels.append(f"{self.prefix_label} {pos} {i}") 

3818 return labels 

3819 

3820 def describe(self) -> list[str]: 

3821 """Gives a list of strings describing this feature's feature-axis. 

3822 

3823 A feature computes a collective variable (CV). A CV is aligned with an MD 

3824 trajectory on the time/frame-axis. The feature axis is unique for every 

3825 feature. A feature describing the backbone torsions (phi, omega, psi) would 

3826 have a feature axis with the size 3*n-3, where n is the number of residues. 

3827 The end-to-end distance of a linear protein in contrast would just have 

3828 a feature axis with length 1. This `describe()` method will label these 

3829 values unambiguously. A backbone torsion feature's `describe()` could be 

3830 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

3831 The end-to-end distance feature could be described by 

3832 ['distance_between_MET1_and_LYS80']. 

3833 

3834 Returns: 

3835 list[str]: The labels of this feature. 

3836 

3837 """ 

3838 getlbl = ( 

3839 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}" 

3840 ) 

3841 labels = [] 

3842 for i in self.indexes: 

3843 for pos in ["X", "Y", "Z"]: 

3844 labels.append( 

3845 f"{self.prefix_label} {pos} {getlbl(self.top.atom(i))}" 

3846 ) 

3847 return labels 

3848 

3849 @property 

3850 def name(self) -> str: 

3851 """str: The name of the class: "CentralCartesians".""" 

3852 return "CentralCartesians" 

3853 

3854 

3855class SideChainCartesians(SelectionFeature): 

3856 """Feature that collects all cartesian positions of all non-backbone atoms. 

3857 

3858 Attributes: 

3859 top (mdtraj.Topology): Topology of this feature. 

3860 indexes (np.ndarray): The numpy array returned from `top.select('all')`. 

3861 prefix_label (str): A prefix for the labels. In this case it is 'SIDECHPOS'. 

3862 

3863 """ 

3864 

3865 prefix_label: str = "SIDECHPOS" 

3866 

3867 def __init__( 

3868 self, 

3869 traj: SingleTraj, 

3870 check_aas: bool = True, 

3871 generic_labels: bool = False, 

3872 delayed: bool = False, 

3873 ) -> None: 

3874 """Instantiate the `SideChainCartesians feature. 

3875 

3876 Uses MDTraj's 'not backbone' topology selector. Is not guaranteed to 

3877 work with the better tested `SideChainDihedrals`. 

3878 

3879 """ 

3880 self.traj = traj 

3881 dihe_indices = np.unique(SideChainDihedrals(traj=traj).angle_indexes.flatten()) 

3882 backbone_indices = CentralCartesians(traj=traj).indexes 

3883 indexes = np.setdiff1d(dihe_indices, backbone_indices) 

3884 assert indexes[0] in dihe_indices and indexes[0] not in backbone_indices 

3885 super().__init__( 

3886 self.traj, indexes=indexes, check_aas=check_aas, delayed=delayed 

3887 ) 

3888 self.dimension = 3 * len(self.indexes) 

3889 if generic_labels: 

3890 self.describe = self.generic_describe 

3891 

3892 def generic_describe(self) -> list[str]: 

3893 """Returns a list of generic labels, not containing residue names. 

3894 These can be used to stack tops of different topology. 

3895 

3896 Returns: 

3897 list[str]: A list of labels. 

3898 

3899 """ 

3900 labels = [] 

3901 if hasattr(self.traj, "clustal_w"): 

3902 raise NotImplementedError( 

3903 f"SideChainCartesians can't currently handle alignments. The " 

3904 f"implementation below won't probably work." 

3905 ) 

3906 # clustal_w_ = [*self.traj.clustal_w] 

3907 # clustal_w = [None] * (len(clustal_w_) * 3) 

3908 # clustal_w[::3] = clustal_w_ 

3909 # clustal_w[1::3] = clustal_w_ 

3910 # clustal_w[2::3] = clustal_w_ 

3911 # clustal_w = np.array(clustal_w) 

3912 # indices = (np.arange(len(clustal_w)) + 1)[clustal_w != "-"] 

3913 # assert len(indices) == len( 

3914 # self.central_indexes 

3915 # ), f"{indices.shape=} {self.indexes.shape=} {clustal_w.shape=} {clustal_w[:20]}" 

3916 else: 

3917 indices = self.indexes 

3918 visited_residues = set() 

3919 for i in indices: 

3920 residx = self.traj.top.atom(i).residue.index + 1 

3921 if residx not in visited_residues: 

3922 visited_residues.add(residx) 

3923 atom_index = 1 

3924 for pos in ["X", "Y", "Z"]: 

3925 labels.append(f"{self.prefix_label} {pos} {atom_index} {residx}") 

3926 atom_index += 1 

3927 return labels 

3928 

3929 @property 

3930 def name(self): 

3931 """str: The name of the class: "SideChainCartesians".""" 

3932 return "SideChainCartesians" 

3933 

3934 def describe(self) -> list[str]: 

3935 """Gives a list of strings describing this feature's feature-axis. 

3936 

3937 A feature computes a collective variable (CV). A CV is aligned with an MD 

3938 trajectory on the time/frame-axis. The feature axis is unique for every 

3939 feature. A feature describing the backbone torsions (phi, omega, psi) would 

3940 have a feature axis with the size 3*n-3, where n is the number of residues. 

3941 The end-to-end distance of a linear protein in contrast would just have 

3942 a feature axis with length 1. This `describe()` method will label these 

3943 values unambiguously. A backbone torsion feature's `describe()` could be 

3944 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

3945 The end-to-end distance feature could be described by 

3946 ['distance_between_MET1_and_LYS80']. 

3947 

3948 Returns: 

3949 list[str]: The labels of this feature. 

3950 

3951 """ 

3952 getlbl = ( 

3953 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4} CHAIN {at.residue.chain.index}" 

3954 ) 

3955 labels = [] 

3956 for i in self.indexes: 

3957 for pos in ["X", "Y", "Z"]: 

3958 labels.append( 

3959 f"{self.prefix_label} {pos} {getlbl(self.top.atom(i))}" 

3960 ) 

3961 return labels 

3962 

3963 

3964class AllBondDistances(DistanceFeature): 

3965 """Feature that collects all bonds in a topology. 

3966 

3967 Attributes: 

3968 top (mdtraj.Topology): Topology of this feature. 

3969 indexes (np.ndarray): The numpy array returned from `top.select('all')`. 

3970 prefix_label (str): A prefix for the labels. In this case it is 'DISTANCE'. 

3971 

3972 """ 

3973 

3974 prefix_label: str = "DISTANCE " 

3975 

3976 def __init__( 

3977 self, 

3978 traj: SingleTraj, 

3979 distance_indexes: Optional[np.ndarray] = None, 

3980 periodic: bool = True, 

3981 check_aas: bool = True, 

3982 delayed: bool = False, 

3983 ) -> None: 

3984 self.distance_indexes = distance_indexes 

3985 if self.distance_indexes is None: 

3986 self.traj = traj 

3987 self.distance_indexes = np.vstack( 

3988 [[b[0].index, b[1].index] for b in self.traj.top.bonds] 

3989 ) 

3990 # print(self.distance_indexes, len(self.distance_indexes)) 

3991 super().__init__( 

3992 self.traj, 

3993 self.distance_indexes, 

3994 periodic, 

3995 check_aas=check_aas, 

3996 delayed=delayed, 

3997 ) 

3998 else: 

3999 super().__init__( 

4000 self.traj, 

4001 self.distance_indexes, 

4002 periodic, 

4003 check_aas=check_aas, 

4004 delayed=delayed, 

4005 ) 

4006 # print(self.distance_indexes, len(self.distance_indexes)) 

4007 

4008 def generic_describe(self) -> list[str]: 

4009 """Returns a list of generic labels, not containing residue names. 

4010 These can be used to stack tops of different topology. 

4011 

4012 Returns: 

4013 list[str]: A list of labels. 

4014 

4015 """ 

4016 if hasattr(self.traj, "clustal_w"): 

4017 raise NotImplementedError( 

4018 f"AllBondDistances can currently not align disjoint sequences." 

4019 ) 

4020 else: 

4021 indices = np.arange(len(self.distance_indexes)) + 1 

4022 labels = [] 

4023 for i in indices: 

4024 labels.append(f"{self.prefix_label}{i}") 

4025 return labels 

4026 

4027 def describe(self) -> list[str]: 

4028 """Gives a list of strings describing this feature's feature-axis. 

4029 

4030 A feature computes a collective variable (CV). A CV is aligned with an MD 

4031 trajectory on the time/frame-axis. The feature axis is unique for every 

4032 feature. A feature describing the backbone torsions (phi, omega, psi) would 

4033 have a feature axis with the size 3*n-3, where n is the number of residues. 

4034 The end-to-end distance of a linear protein in contrast would just have 

4035 a feature axis with length 1. This `describe()` method will label these 

4036 values unambiguously. A backbone torsion feature's `describe()` could be 

4037 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

4038 The end-to-end distance feature could be described by 

4039 ['distance_between_MET1_and_LYS80']. 

4040 

4041 Returns: 

4042 list[str]: The labels of this feature. 

4043 

4044 """ 

4045 getlbl = ( 

4046 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4}" 

4047 ) 

4048 labels = [] 

4049 for i, j in self.distance_indexes: 

4050 i, j = self.top.atom(i), self.top.atom(j) 

4051 labels.append( 

4052 f"{self.prefix_label}{getlbl(i)} DIST {getlbl(j)} CHAIN {int(np.unique([a.residue.chain.index for a in [i, j]])[0])}" 

4053 ) 

4054 return labels 

4055 

4056 @property 

4057 def name(self) -> str: 

4058 """str: The name of the class: "AllBondDistances".""" 

4059 return "AllBondDistances" 

4060 

4061 @property 

4062 def indexes(self) -> np.ndarray: 

4063 """np.ndarray: A (n_angles, 2) shaped numpy array giving the atom indices 

4064 of the distances to be calculated.""" 

4065 return self.distance_indexes 

4066 

4067 

4068class CentralBondDistances(AllBondDistances): 

4069 """Feature that collects all bonds in the backbone of a topology. 

4070 

4071 Attributes: 

4072 top (mdtraj.Topology): Topology of this feature. 

4073 indexes (np.ndarray): The numpy array returned from `top.select('all')`. 

4074 prefix_label (str): A prefix for the labels. In this case, it is 'CENTERDISTANCE'. 

4075 

4076 """ 

4077 

4078 prefix_label: str = "CENTERDISTANCE " 

4079 

4080 def __init__( 

4081 self, 

4082 traj: SingleTraj, 

4083 distance_indexes: Optional[np.ndarray] = None, 

4084 periodic: bool = True, 

4085 generic_labels: bool = False, 

4086 check_aas: bool = True, 

4087 delayed: bool = False, 

4088 ) -> None: 

4089 self.traj = traj 

4090 select = traj.top.select("name CA or name C or name N") 

4091 

4092 if distance_indexes is None: 

4093 distance_indexes = [] 

4094 

4095 for b in traj.top.bonds: 

4096 if np.all([np.isin(x.index, select) for x in b]): 

4097 distance_indexes.append([x.index for x in b]) 

4098 distance_indexes = np.sort(distance_indexes, axis=0) 

4099 

4100 if generic_labels: 

4101 self.describe = self.generic_describe 

4102 

4103 super().__init__( 

4104 self.traj, distance_indexes, periodic, check_aas=check_aas, delayed=delayed 

4105 ) 

4106 

4107 def generic_describe(self) -> list[str]: 

4108 """Returns a list of generic labels, not containing residue names. 

4109 These can be used to stack tops of different topology. 

4110 

4111 Returns: 

4112 list[str]: A list of labels. 

4113 

4114 """ 

4115 if hasattr(self.traj, "clustal_w"): 

4116 indices = [] 

4117 clustal_w_ = [*self.traj.clustal_w] 

4118 clustal_w = [None] * (len(clustal_w_) * 3) 

4119 clustal_w[::3] = clustal_w_ 

4120 clustal_w[1::3] = clustal_w_ 

4121 clustal_w[2::3] = clustal_w_ 

4122 i = 0 

4123 for a, b in zip(clustal_w[:-1], clustal_w[1:]): 

4124 i += 1 

4125 if a == "-": 

4126 continue 

4127 indices.append(i) 

4128 indices = np.array(indices) 

4129 else: 

4130 indices = np.arange(len(self.distance_indexes)) + 1 

4131 labels = [] 

4132 for i in indices: 

4133 labels.append(f"{self.prefix_label}{i}") 

4134 return labels 

4135 

4136 @property 

4137 def name(self) -> str: 

4138 """str: The name of the class: "CentralBondDistances".""" 

4139 return "CentralBondDistances" 

4140 

4141 @property 

4142 def indexes(self) -> np.ndarray: 

4143 """np.ndarray: A (n_angles, 2) shaped numpy array giving the atom indices 

4144 of the distances to be calculated.""" 

4145 return self.distance_indexes 

4146 

4147 

4148class SideChainBondDistances(AllBondDistances): 

4149 """Feature that collects all bonds not in the backbone of a topology. 

4150 

4151 Attributes: 

4152 top (mdtraj.Topology): Topology of this feature. 

4153 indexes (np.ndarray): The numpy array returned from `top.select('all')`. 

4154 prefix_label (str): A prefix for the labels. In this case it is 'SIDECHDISTANCE'. 

4155 

4156 """ 

4157 

4158 prefix_label: str = "SIDECHDISTANCE " 

4159 

4160 def __init__( 

4161 self, 

4162 traj: SingleTraj, 

4163 periodic: bool = True, 

4164 check_aas: bool = True, 

4165 generic_labels: bool = False, 

4166 delayed: bool = False, 

4167 ) -> None: 

4168 self.traj = traj 

4169 

4170 which = ["chi1", "chi2", "chi3", "chi4", "chi5"] 

4171 indices_dict = {k: getattr(self.traj, f"indices_{k}") for k in which} 

4172 # flat_list = [ 

4173 # item 

4174 # for sublist in indices_dict.values() 

4175 # for item in sublist.flatten().tolist() 

4176 # ] 

4177 # atoms_in_sidechain_dihedrals = set(flat_list) 

4178 

4179 distance_indexes = [] 

4180 for angle, indices in indices_dict.items(): 

4181 for index in indices: 

4182 if angle == "chi1": 

4183 distance_indexes.append([index[1], index[2]]) 

4184 distance_indexes.append([index[2], index[3]]) 

4185 else: 

4186 distance_indexes.append([index[2], index[3]]) 

4187 distance_indexes = np.sort(distance_indexes, axis=0) 

4188 super().__init__( 

4189 self.traj, 

4190 distance_indexes=distance_indexes, 

4191 periodic=periodic, 

4192 check_aas=check_aas, 

4193 delayed=delayed, 

4194 ) 

4195 if generic_labels: 

4196 self.describe = self.generic_describe 

4197 

4198 def generic_describe(self) -> list[str]: 

4199 """Returns a list of generic labels, not containing residue names. 

4200 These can be used to stack tops of different topology. 

4201 

4202 Returns: 

4203 list[str]: A list of labels. 

4204 

4205 """ 

4206 labels = [] 

4207 if hasattr(self.traj, "clustal_w"): 

4208 raise NotImplementedError 

4209 # indices = [] 

4210 # clustal_w_ = [*self.traj.clustal_w] 

4211 # clustal_w = [None] * (len(clustal_w_) * 3) 

4212 # clustal_w[::3] = clustal_w_ 

4213 # clustal_w[1::3] = clustal_w_ 

4214 # clustal_w[2::3] = clustal_w_ 

4215 # i = 0 

4216 # for a, b in zip(clustal_w[:-1], clustal_w[1:]): 

4217 # i += 1 

4218 # if a == "-": 

4219 # continue 

4220 # indices.append(i) 

4221 # indices = np.array(indices) 

4222 else: 

4223 indices = self.distance_indexes 

4224 visited_residues = set() 

4225 for a, b in indices: 

4226 residx_a = self.traj.top.atom(a).residue.index + 1 

4227 residx_b = self.traj.top.atom(b).residue.index + 1 

4228 assert residx_a == residx_b, ( 

4229 f"The sidechain distance between atom {self.traj.top.atom(a)} and " 

4230 f"{self.traj.top.atom(b)} describes a distance between two residues " 

4231 f"({residx_a} and {residx_b})." 

4232 f"As sidechains belong always to a single residue, something is off. " 

4233 ) 

4234 if residx_a not in visited_residues: 

4235 visited_residues.add(residx_a) 

4236 distance_index = 1 

4237 labels.append(f"{self.prefix_label} {distance_index} {residx_a}") 

4238 distance_index += 1 

4239 return labels 

4240 

4241 @property 

4242 def name(self): 

4243 """str: The name of the class: "SideChainBondDistances".""" 

4244 return "SideChainBondDistances" 

4245 

4246 @property 

4247 def indexes(self): 

4248 """np.ndarray: A (n_angles, 2) shaped numpy array giving the atom indices 

4249 of the distances to be calculated.""" 

4250 return self.distance_indexes 

4251 

4252 

4253class CentralAngles(AngleFeature): 

4254 """Feature that collects all angles in the backbone of a topology. 

4255 

4256 Attributes: 

4257 top (mdtraj.Topology): Topology of this feature. 

4258 indexes (np.ndarray): The numpy array returned from `top.select('all')`. 

4259 prefix_label (str): A prefix for the labels. In this case it is 'CENTERANGLE'. 

4260 

4261 """ 

4262 

4263 prefix_label: str = "CENTERANGLE " 

4264 

4265 def __init__( 

4266 self, 

4267 traj: Union[SingleTraj, TrajEnsemble], 

4268 deg: bool = False, 

4269 cossin: bool = False, 

4270 periodic: bool = True, 

4271 generic_labels: bool = False, 

4272 check_aas: bool = True, 

4273 delayed: bool = False, 

4274 ) -> None: 

4275 self.traj = traj 

4276 select = traj.top.select("name CA or name C or name N") 

4277 # add 4 bonds in KAC 

4278 # if any([r.name == "KAC" for r in top.residues]): 

4279 # self.top = add_KAC_backbone_bonds(self.top) 

4280 bonds = np.vstack([[x.index for x in b] for b in traj.top.bonds]) 

4281 bond_names = np.vstack([[x for x in b] for b in traj.top.bonds]) 

4282 angle_indexes = [] 

4283 for a in select: 

4284 where = np.where(bonds == a) 

4285 possible_bonds = bonds[where[0], :] 

4286 possible_bond_names = bond_names[where[0], :] 

4287 where = np.isin(possible_bonds, select) 

4288 hits = np.count_nonzero(np.all(where, axis=1)) 

4289 if hits <= 1: 

4290 # atom is not part of any angles 

4291 continue 

4292 elif hits == 2: 

4293 where = np.all(where, axis=1) 

4294 these = np.unique( 

4295 [traj.top.atom(i).index for i in possible_bonds[where, :].flatten()] 

4296 ) 

4297 angle_indexes.append(these) 

4298 elif hits == 3: 

4299 a = traj.top.atom(a) 

4300 bonds = "\n".join( 

4301 [ 

4302 f"BOND {str(i):<10}-{str(j):>10}" 

4303 for i, j in traj.top.bonds 

4304 if i == a or j == a 

4305 ] 

4306 ) 

4307 raise Exception( 

4308 f"The atom {a} takes part in three possible angles defined " 

4309 f"by the C, CA, and N atoms:\n{bonds}." 

4310 ) 

4311 elif hits == 4: 

4312 raise Exception( 

4313 "Can't deal with these angles. One atom is part of four possible angles" 

4314 ) 

4315 else: 

4316 raise Exception( 

4317 "Can't deal with these angles. One atom is part of more than three angles" 

4318 ) 

4319 

4320 angle_indexes = np.vstack(angle_indexes) 

4321 angle_indexes = np.unique(angle_indexes, axis=0) 

4322 if generic_labels: 

4323 self.describe = self.generic_describe 

4324 super().__init__( 

4325 traj, angle_indexes, deg, cossin, periodic, check_aas, delayed=delayed 

4326 ) 

4327 

4328 def generic_describe(self) -> list[str]: 

4329 """Returns a list of generic labels, not containing residue names. 

4330 These can be used to stack tops of different topology. 

4331 

4332 Returns: 

4333 list[str]: A list of labels. 

4334 

4335 """ 

4336 if hasattr(self.traj, "clustal_w"): 

4337 indices = [] 

4338 clustal_w_ = [*self.traj.clustal_w] 

4339 clustal_w = [None] * (len(clustal_w_) * 3) 

4340 clustal_w[::3] = clustal_w_ 

4341 clustal_w[1::3] = clustal_w_ 

4342 clustal_w[2::3] = clustal_w_ 

4343 i = 0 

4344 for a, b, c in zip(clustal_w[:-2], clustal_w[1:-1], clustal_w[2:]): 

4345 i += 1 

4346 if a == "-": 

4347 continue 

4348 indices.append(i) 

4349 indices = np.array(indices) 

4350 else: 

4351 indices = np.arange(len(self.angle_indexes)) + 1 

4352 labels = [] 

4353 for i in indices: 

4354 labels.append(f"{self.prefix_label}{i}") 

4355 return labels 

4356 

4357 def describe(self) -> list[str]: 

4358 """Gives a list of strings describing this feature's feature-axis. 

4359 

4360 A feature computes a collective variable (CV). A CV is aligned with an MD 

4361 trajectory on the time/frame-axis. The feature axis is unique for every 

4362 feature. A feature describing the backbone torsions (phi, omega, psi) would 

4363 have a feature axis with the size 3*n-3, where n is the number of residues. 

4364 The end-to-end distance of a linear protein in contrast would just have 

4365 a feature axis with length 1. This `describe()` method will label these 

4366 values unambiguously. A backbone torsion feature's `describe()` could be 

4367 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

4368 The end-to-end distance feature could be described by 

4369 ['distance_between_MET1_and_LYS80']. 

4370 

4371 Returns: 

4372 list[str]: The labels of this feature. 

4373 

4374 """ 

4375 getlbl = ( 

4376 lambda at: f"ATOM {at.name:>4}:{at.index:>5} {at.residue.name}:{at.residue.resSeq:>4}" 

4377 ) 

4378 labels = [] 

4379 for i, j, k in self.angle_indexes: 

4380 i, j, k = self.top.atom(i), self.top.atom(j), self.top.atom(k) 

4381 labels.append( 

4382 f"{self.prefix_label}{getlbl(i)} ANGLE {getlbl(j)} ANGLE " 

4383 f"{getlbl(k)} CHAIN " 

4384 f"{int(np.unique([a.residue.chain.index for a in [i, j, k]])[0])}" 

4385 ) 

4386 return labels 

4387 

4388 @property 

4389 def name(self) -> str: 

4390 """str: The name of the class: "CentralAngles".""" 

4391 return "CentralAngles" 

4392 

4393 @property 

4394 def indexes(self) -> np.ndarray: 

4395 """np.ndarray: A (n_angles, 3) shaped numpy array giving the atom indices 

4396 of the angles to be calculated.""" 

4397 return self.angle_indexes 

4398 

4399 

4400class SideChainAngles(AngleFeature): 

4401 """Feature that collects all angles not in the backbone of a topology. 

4402 

4403 Attributes: 

4404 top (mdtraj.Topology): Topology of this feature. 

4405 indexes (np.ndarray): The numpy array returned from `top.select('all')`. 

4406 prefix_label (str): A prefix for the labels. In this case it is 'SIDECHANGLE'. 

4407 

4408 """ 

4409 

4410 prefix_label: str = "SIDECHANGLE " 

4411 

4412 def __init__( 

4413 self, 

4414 traj: SingleTraj, 

4415 deg: bool = False, 

4416 cossin: bool = False, 

4417 periodic: bool = True, 

4418 check_aas: bool = True, 

4419 generic_labels: bool = False, 

4420 delayed: bool = False, 

4421 ) -> None: 

4422 self.traj = traj 

4423 angle_indexes = [] 

4424 for residue, ind in traj._custom_top.sidechain_indices_by_residue(): 

4425 ind = np.vstack( 

4426 [ 

4427 ind[:-2], 

4428 ind[1:-1], 

4429 ind[2:], 

4430 ] 

4431 ).T 

4432 angle_indexes.append(ind) 

4433 angle_indexes = np.vstack(angle_indexes) 

4434 super().__init__( 

4435 self.traj, angle_indexes, deg, cossin, periodic, check_aas, delayed=delayed 

4436 ) 

4437 if generic_labels: 

4438 self.describe = self.generic_describe 

4439 

4440 def generic_describe(self) -> list[str]: 

4441 """Returns a list of generic labels, not containing residue names. 

4442 These can be used to stack tops of different topology. 

4443 

4444 Returns: 

4445 list[str]: A list of labels. 

4446 

4447 """ 

4448 labels = [] 

4449 if hasattr(self.traj, "clustal_w"): 

4450 raise NotImplementedError 

4451 # indices = [] 

4452 # clustal_w_ = [*self.traj.clustal_w] 

4453 # clustal_w = [None] * (len(clustal_w_) * 3) 

4454 # clustal_w[::3] = clustal_w_ 

4455 # clustal_w[1::3] = clustal_w_ 

4456 # clustal_w[2::3] = clustal_w_ 

4457 # i = 0 

4458 # for a, b, c in zip(clustal_w[:-2], clustal_w[1:-1], clustal_w[2:]): 

4459 # i += 1 

4460 # if a == "-": 

4461 # continue 

4462 # indices.append(i) 

4463 # indices = np.array(indices) 

4464 else: 

4465 indices = self.angle_indexes 

4466 visited_residues = set() 

4467 for a, b, c in indices: 

4468 residx_a = self.traj.top.atom(a).residue.index + 1 

4469 residx_b = self.traj.top.atom(b).residue.index + 1 

4470 residx_c = self.traj.top.atom(c).residue.index + 1 

4471 assert residx_a == residx_b == residx_c, ( 

4472 f"The sidechain distance between atom {self.traj.top.atom(a)}, " 

4473 f"{self.traj.top.atom(b)}, and {self.traj.top.atom(c)} describes " 

4474 f"an angle between two or more residues ({residx_a}, {residx_b}, and {residx_c})." 

4475 f"As sidechains belong always to a single residue, something is off. " 

4476 ) 

4477 if residx_a not in visited_residues: 

4478 visited_residues.add(residx_a) 

4479 angle_index = 1 

4480 labels.append(f"{self.prefix_label} {angle_index} {residx_a}") 

4481 angle_index += 1 

4482 return labels 

4483 

4484 def describe(self): 

4485 """Gives a list of strings describing this feature's feature-axis. 

4486 

4487 A feature computes a collective variable (CV). A CV is aligned with an MD 

4488 trajectory on the time/frame-axis. The feature axis is unique for every 

4489 feature. A feature describing the backbone torsions (phi, omega, psi) would 

4490 have a feature axis with the size 3*n-3, where n is the number of residues. 

4491 The end-to-end distance of a linear protein in contrast would just have 

4492 a feature axis with length 1. This `describe()` method will label these 

4493 values unambiguously. A backbone torsion feature's `describe()` could be 

4494 ['phi_1', 'omega_1', 'psi_1', 'phi_2', 'omega_2', ..., 'psi_n-1']. 

4495 The end-to-end distance feature could be described by 

4496 ['distance_between_MET1_and_LYS80']. 

4497 

4498 Returns: 

4499 list[str]: The labels of this feature. 

4500 

4501 """ 

4502 getlbl = ( 

4503 lambda at: f"ATOM {at.name:>4}:{at.index:5} {at.residue.name}:{at.residue.resSeq:>4}" 

4504 ) 

4505 labels = [] 

4506 for i, j, k in self.angle_indexes: 

4507 i, j, k = self.top.atom(i), self.top.atom(j), self.top.atom(k) 

4508 labels.append( 

4509 f"{self.prefix_label}{getlbl(i)} ANGLE {getlbl(j)} ANGLE {getlbl(k)} CHAIN {int(np.unique([a.residue.chain.index for a in [i, j, k]])[0])}" 

4510 ) 

4511 return labels 

4512 

4513 @property 

4514 def name(self): 

4515 """str: The name of the class: "SideChainAngles".""" 

4516 return "SideChainAngles" 

4517 

4518 @property 

4519 def indexes(self): 

4520 """np.ndarray: A (n_angles, 3) shaped numpy array giving the atom indices 

4521 of the angles to be calculated.""" 

4522 return self.angle_indexes