Coverage for encodermap/trajinfo/info_single.py: 79%

499 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-07 11:05 +0000

1# -*- coding: utf-8 -*- 

2# encodermap/trajinfo/info_single.py 

3################################################################################ 

4# Encodermap: A python library for dimensionality reduction. 

5# 

6# Copyright 2019-2022 University of Konstanz and the Authors 

7# 

8# Authors: 

9# Kevin Sawade 

10# 

11# Encodermap is free software: you can redistribute it and/or modify 

12# it under the terms of the GNU Lesser General Public License as 

13# published by the Free Software Foundation, either version 2.1 

14# of the License, or (at your option) any later version. 

15# This package is distributed in the hope that it will be useful to other 

16# researches. IT DOES NOT COME WITH ANY WARRANTY WHATSOEVER; without even the 

17# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 

18# See the GNU Lesser General Public License for more details. 

19# 

20# See <http://www.gnu.org/licenses/>. 

21################################################################################ 

22"""Classes to work with ensembles of trajectories. 

23 

24The statistics of a protein can be better described by an ensemble of proteins, 

25rather than a single long trajectory. Treating a protein in such a way opens great 

26possibilities and changes the way one can treat molecular dynamics data. 

27Trajectory ensembles allow: 

28 * Faster convergence via adaptive sampling. 

29 * Better anomaly detection of unique structural states. 

30 

31 

32This subpackage contains two classes which are containers of trajecotry data. 

33The SingleTraj trajecotry contains information about a single trajecotry. 

34The TrajEnsemble class contains information about multiple trajectories. This adds 

35a new dimension to MD data. The time and atom dimension are already established. 

36Two frames can be appended along the time axis to get a trajectory with multiple 

37frames. If they are appended along the atom axis, the new frame contains the 

38atoms of these two. The trajectory works in a similar fashion. Adding two trajectories 

39along the trajectory axis returns a trajectory ensemble, represented as an TrajEnsemble 

40class in this package. 

41 

42See also: 

43 http://statisticalbiophysicsblog.org/?p=92 

44 

45""" 

46 

47################################################################################ 

48# Imports 

49################################################################################ 

50 

51 

52from __future__ import annotations 

53 

54import errno 

55import os 

56import sys 

57import warnings 

58from io import StringIO 

59from pathlib import Path 

60 

61import numpy as np 

62 

63from .._optional_imports import _optional_import 

64from ..misc.errors import BadError 

65from ..misc.misc import _TOPOLOGY_EXTS 

66from ..misc.xarray import construct_xarray_from_numpy 

67from ..misc.xarray_save_wrong_hdf5 import save_netcdf_alongside_mdtraj 

68from .info_all import TrajEnsemble 

69from .load_traj import _load_traj 

70 

71################################################################################ 

72# Optional Imports 

73################################################################################ 

74 

75 

76md = _optional_import("mdtraj") 

77mda = _optional_import("MDAnalysis") 

78h5 = _optional_import("h5py") 

79xr = _optional_import("xarray") 

80 

81 

82################################################################################ 

83# Typing 

84################################################################################ 

85 

86 

87from typing import TYPE_CHECKING, Callable, Literal, Optional, Union 

88 

89if TYPE_CHECKING: 

90 import h5py as h5 

91 import MDAnalysis as mda 

92 import mdtraj as md 

93 import xarray as xr 

94 

95 from .trajinfo_utils import SingleTrajFeatureType 

96 

97 

98################################################################################ 

99# Globals 

100################################################################################ 

101 

102 

103__all__ = ["SingleTraj"] 

104 

105 

106################################################################################ 

107# Utilities 

108################################################################################ 

109 

110 

111def combine_attrs(attrs, context): 

112 out = {"length_units": "nm", "time_units": "ps", "angle_units": "rad"} 

113 for attr in attrs: 

114 for key, val in attr.items(): 

115 if key in ["full_path", "topology_file", "feature_axis"]: 

116 if key in out: 

117 if isinstance(out[key], list): 117 ↛ 118line 117 didn't jump to line 118, because the condition on line 117 was never true

118 out[key].append(val) 

119 else: 

120 out[key] = [out[key], val] 

121 else: 

122 out[key] = val 

123 else: 

124 out[key] = val 

125 return out 

126 

127 

128class Capturing(list): 

129 """Class to capture print statements from function calls. 

130 

131 Examples: 

132 >>> # write a function 

133 >>> def my_func(arg='argument'): 

134 ... print(arg) 

135 ... return('fin') 

136 >>> # use capturing context manager 

137 >>> with Capturing() as output: 

138 ... my_func('new_argument') 

139 >>> print(output) 

140 ['new_argument', "'fin'"] 

141 

142 """ 

143 

144 def __enter__(self): 

145 self._stdout = sys.stdout 

146 sys.stdout = self._stringio = StringIO() 

147 return self 

148 

149 def __exit__(self, *args): 

150 self.extend(self._stringio.getvalue().splitlines()) 

151 del self._stringio # free up some memory 

152 sys.stdout = self._stdout 

153 

154 

155def _hash_numpy_array(x: np.ndarray) -> int: 

156 hash_value = hash(x.shape) 

157 hash_value ^= hash(x.strides) 

158 hash_value ^= hash(x.data.tobytes()) 

159 return hash_value 

160 

161 

162############################################################################## 

163# Classes 

164############################################################################## 

165 

166 

167class SingleTraj: 

168 """This class contains the info about a single trajectory. 

169 

170 This class contains many of the attributes and methods of mdtraj's Trajectory. 

171 It is meant to be used as a single trajectory in a ensemble defined in the 

172 TrajEnsemble class. Other than the standard mdtraj Trajectory this class loads the 

173 MD data only when needed. The location of the file and other attributes like 

174 a single integer index (single frame of trajectory) or a list of integers 

175 (multiple frames of the same traj) are stored until the traj is accessed via the 

176 `SingleTraj.traj` attribute. The returned traj is a mdtraj Trajectory 

177 with the correct number of frames in the correct sequence. 

178 

179 Furthermore this class keeps track of your collective variables. Oftentimes 

180 the raw xyz data of a trajectory is not needed and suitable CVs are selected 

181 to represent a protein via internal coordinates (torsions, pairwise distances, etc.). 

182 This class keeps tack of your CVs. Whether you call them `highd` or 

183 `torsions`, this class keeps track of everything and returns the values when 

184 you need them. 

185 

186 SingleTraj supports fancy indexing, so you can extract one or more frames 

187 from a Trajectory as a separate trajectory. For example, to form a 

188 trajectory with every other frame, you can slice with `traj[::2]`. 

189 

190 SingleTraj uses the nanometer, degree & picosecond unit system. 

191 

192 Attributes: 

193 backend (str): Current state of loading. If backend == 'no_load' xyz data 

194 will be loaded from disk, if accessed. If backend == 'mdtraj', the 

195 data is already in RAM. 

196 common_str (str): Substring of traj_file. Used to group multiple 

197 trajectories together based on common topology files. If traj files 

198 protein1_traj1.xtc and protein1_traj2.xtc share the sameprotein1.pdb 

199 common_str can be set to group them together. 

200 index (Union[int, list, np.array, slice]): Fancy slices of the 

201 trajectory. When file is loaded from disk, the fancy indexes will 

202 be applied. 

203 traj_num (int): Integer to identify a SingleTraj class in a TrajEnsemble class. 

204 traj_file (str): Trajectory file used to create this class. 

205 top_file (str): Topology file used to create this class. If a .h5 trajectory 

206 was used traj_file and top_file are identical. If a mdtraj.Trajectory was 

207 used to create SingleTraj, these strings are empty. 

208 

209 Examples: 

210 >>> # load a pdb file with 14 frames from rcsb.org 

211 >>> import encodermap as em 

212 >>> traj = em.SingleTraj("https://files.rcsb.org/view/1GHC.pdb") 

213 >>> print(traj) 

214 encodermap.SingleTraj object. Current backend is no_load. Basename is 1GHC. Not containing any CVs. 

215 >>> traj.n_frames 

216 14 

217 

218 >>> # advanced slicing 

219 >>> traj = em.SingleTraj("https://files.rcsb.org/view/1GHC.pdb")[-1:7:-2] 

220 >>> print([frame.id for frame in traj]) 

221 [13, 11, 9] 

222 

223 >>> # Build a trajectory ensemble from multiple trajs 

224 >>> traj1 = em.SingleTraj("https://files.rcsb.org/view/1YUG.pdb") 

225 >>> traj2 = em.SingleTraj("https://files.rcsb.org/view/1YUF.pdb") 

226 >>> trajs = traj1 + traj2 

227 >>> print(trajs.n_trajs, trajs.n_frames, [traj.n_frames for traj in trajs]) 

228 2 31 [15, 16] 

229 

230 """ 

231 

232 _mdtraj_attr = [ 

233 "n_frames", 

234 "n_atoms", 

235 "n_chains", 

236 "n_residues", 

237 "openmm_boxes", 

238 "openmm_positions", 

239 "time", 

240 "timestep", 

241 "xyz", 

242 "unitcell_vectors", 

243 "unitcell_lengths", 

244 "unitcell_angles", 

245 "_check_valid_unitcell", 

246 "_distance_unit", 

247 "_have_unitcell", 

248 "_rmsd_traces", 

249 "_savers", 

250 "_string_summary_basic", 

251 "_time", 

252 "_time_default_to_arange", 

253 "_topology", 

254 "_unitcell_angles", 

255 "_unitcell_lengths", 

256 "_xyz", 

257 ] 

258 

259 def __init__( 

260 self, 

261 traj: Union[str, Path, md.Trajectory], 

262 top: Optional[str, Path] = None, 

263 common_str: str = "", 

264 backend: Literal["no_load", "mdtraj"] = "no_load", 

265 index: Optional[Union[int, list[int], np.ndarray, slice]] = None, 

266 traj_num: Optional[int] = None, 

267 basename_fn: Optional[Callable] = None, 

268 ) -> None: 

269 """Initilaize the SingleTraj object with location and reference pdb file. 

270 

271 Args: 

272 traj (Union[str, mdtraj.Trajectory]): The trajectory. Can either be teh filename 

273 of a trajectory file (.xtc, .dcd, .h5, .trr) or a mdtraj.Trajectory. 

274 top (Union[str, mdtraj.Topology], optional): The path to the reference pdb file. 

275 Defaults to ''. If an mdtraj.Trajectory or a .h5 traj filename is provided 

276 this option is not needed. 

277 common_str (str, optional): A string to group traj of similar 

278 topology. If multiple trajs are loaded (TrajEnsemble) this common_str is 

279 used to group them together. Defaults to '' and won't be matched to other trajs. 

280 If traj files protein1_traj1.xtc and protein1_traj2.xtc share the sameprotein1.pdb 

281 and protein2_traj.xtc uses protein2.pdb as its topology this argument 

282 can be ['protein1', 'protein2']. 

283 backend (Literal['no_load', 'mdtraj'], optional): Chooses the backend to load trajectories. 

284 * 'mdtraj' uses mdtraj which loads all trajecoties into RAM. 

285 * 'no_load' creates an empty trajectory object. 

286 Defaults to 'no_load' 

287 index (): An integer or an array giving the indices. 

288 If an integer is provided only the frame at this position will be loaded once the internal 

289 mdtraj.Trajectory is accessed. If an array or list is provided the corresponding frames will be used. 

290 These indices can have duplicates: [0, 1, 1, 2, 0, 1]. A slice object can also be provided. 

291 Supports fancy slicing like traj[1:50:3]. If None is provided the trajectory is simply loaded as is. 

292 Defaults to None 

293 traj_num (Union[int, None], optional): If working with multiple trajs this is the easiest unique identifier. 

294 If multiple SingleTrajs are instantiated by TrajEnsemble the traj_num is used as unique identifier per traj. 

295 Defaults to None. 

296 basename_fn (Optional[Callable]): A function to apply to `traj_file` to give it 

297 a unique identifier. If all your trajs are called traj.xtc and only the directory they're in 

298 gives them a unique identifier you can provide a function into this argument to split the path. 

299 If None is provided the basename is extracted liek so: `lambda x: x.split('/')[0].split('.')[-1]. 

300 Defaults to None. 

301 

302 """ 

303 # defaults 

304 self.backend = backend 

305 self.common_str = common_str 

306 self.index = index if isinstance(index, tuple) else (index,) 

307 self.traj_num = traj_num 

308 self._loaded_once = False if backend == "no_load" else True 

309 self._orig_frames = np.array([]) 

310 self._CVs = xr.Dataset() 

311 if basename_fn is None: 

312 basename_fn = lambda x: os.path.basename(x).split(".")[0] 

313 self.basename_fn = basename_fn 

314 

315 # save the filename 

316 if isinstance(traj, str): 

317 if self._validate_uri(traj): 

318 self._traj_file = traj 

319 else: 

320 self._traj_file = Path(traj) 

321 elif isinstance(traj, Path): 

322 self._traj_file = traj 

323 elif isinstance(traj, md.Trajectory): 

324 self._traj_file = Path("") 

325 self._top_file = Path("") 

326 self.backend = "mdtraj" 

327 self.trajectory = traj 

328 self._loaded_once = True 

329 self._orig_frames = np.arange(traj.n_frames) 

330 else: 

331 raise ValueError( 

332 f"Argument `traj` takes either str, Path, or " 

333 f"mdtraj.Trajectory. You supplied: {type(traj)}." 

334 ) 

335 

336 if top is not None: 

337 if isinstance(top, md.Topology): 

338 self.topology = top 

339 self._top_file = Path("") 

340 else: 

341 self._top_file = Path(top) 

342 if isinstance(self._traj_file, Path): 342 ↛ 362line 342 didn't jump to line 362, because the condition on line 342 was never false

343 if ( 

344 self._traj_file.suffix in _TOPOLOGY_EXTS 

345 and self._traj_file != self._top_file 

346 ): 

347 raise BadError( 

348 f"You probably mixed up the input. Normally you " 

349 f"want to instantiate with `SingleTraj(traj, top)`. Based on " 

350 f"the files and the extensions you provided " 

351 f"(traj={self._traj_file.name} and top=" 

352 f"{self._top_file.name}), you want to change the " 

353 f"order of the arguments, or use keyword arguments." 

354 ) 

355 else: 

356 if isinstance(self._traj_file, Path): 

357 if self._traj_file.suffix in _TOPOLOGY_EXTS: 

358 self._top_file = self._traj_file 

359 else: 

360 self._top_file = self._traj_file 

361 

362 if self.backend == "no_load": 

363 self.trajectory = False 

364 self.topology = False 

365 else: 

366 if isinstance(self._traj_file, str) and self._validate_uri(self._traj_file): 

367 traj = md.load_pdb(str(self.traj_file)) 

368 elif self._traj_file != Path(""): 

369 traj = md.load(str(self._traj_file), top=str(self._top_file)) 

370 self.trajectory = traj 

371 self.topology = self.trajectory.top 

372 self._loaded_once = True 

373 self._orig_frames = np.arange(traj.n_frames) 

374 

375 # maybe load CVs from h5 file 

376 if isinstance(self._traj_file, Path): 

377 if self._traj_file.suffix == ".h5": 

378 CVs_in_file = False 

379 with h5.File(self.traj_file, "r") as file: 

380 if "CVs" in file.keys(): 

381 CVs_in_file = True 

382 if CVs_in_file: 

383 try: 

384 self._CVs = xr.open_dataset( 

385 self.traj_file, 

386 group="CVs", 

387 engine="h5netcdf", 

388 backend_kwargs={"phony_dims": "access"}, 

389 ) 

390 # bad formatted h5 file 

391 except OSError: 

392 DAs = { 

393 k: construct_xarray_from_numpy(self, i[()], k) 

394 for k, i in file["CVs"].items() 

395 } 

396 DS = xr.Dataset(DAs) 

397 self._CVs.update(DS) 

398 # other exceptions probably due to formatting 

399 except Exception as e: 

400 raise BadError( 

401 f"The formatting of the data in the file " 

402 f"{self.traj_file} is off. Xarray could " 

403 f"not load the group 'CVs' and failed with {e}" 

404 ) 

405 

406 # get the original frame indices from the dataset 

407 # this is the only case where we want to overwrite 

408 # this variable 

409 if not self._loaded_once: 409 ↛ 411line 409 didn't jump to line 411, because the condition on line 409 was never false

410 self._loaded_once = True 

411 self._orig_frames = self._CVs["frame_num"].values 

412 

413 # iteratively apply index 

414 index = self._orig_frames 

415 for ind in self.index: 

416 if ind is not None: 

417 index = index[ind] 

418 

419 # set the _CVs accordingly 

420 self._CVs = self._CVs.loc[{"frame_num": index}] 

421 

422 @classmethod 

423 def from_pdb_id(cls, pdb_id: str) -> SingleTraj: 

424 """Alternate constructor for the TrajEnsemble class. 

425 

426 Builds an SingleTraj class from a pdb-id. 

427 

428 Args: 

429 pdb_id (str): The 4-letter pdb id. 

430 

431 Returns: 

432 SingleTraj: An SingleTraj class. 

433 

434 """ 

435 url = f"https://files.rcsb.org/view/{pdb_id.upper()}.pdb" 

436 return cls(url) 

437 

438 @property 

439 def _original_frame_indices(self): 

440 if self._loaded_once: 

441 return self._orig_frames 

442 else: 

443 self.load_traj() 

444 return self._orig_frames 

445 

446 @property 

447 def traj_file(self) -> str: 

448 """str: The traj file as a string (rather than a `pathlib.Path`).""" 

449 return str(self._traj_file) 

450 

451 @property 

452 def top_file(self) -> str: 

453 """str: The topology file as a string (rather than a `pathlib.Path`).""" 

454 return str(self._top_file) 

455 

456 @property 

457 def traj(self) -> md.Trajectory: 

458 """mdtraj.Trajectory: This attribute always returns an mdtraj.Trajectory. 

459 if `backend` is 'no_load', the trajectory will be loaded into memory and returned. 

460 

461 """ 

462 if self.backend == "no_load": 

463 self.load_traj() 

464 out = self.trajectory 

465 self.unload() 

466 return out 

467 else: 

468 return self.trajectory 

469 

470 @property 

471 def _traj(self): 

472 """Needs to be here to complete setter. 

473 Not returning anything, because setter is also not returning anything.""" 

474 pass 

475 

476 @_traj.setter 

477 def _traj(self, traj_file): 

478 """Sets the traj and trajectory attributes. Can be provided str or 

479 mdtraj.Trajectory and sets the attributes based on the chosen backend.""" 

480 self.trajectory, _ = _load_traj( 

481 *self.index, traj_file=traj_file, top_file=self._top_file 

482 ) 

483 if not self._loaded_once: 

484 self._loaded_once = True 

485 self._orig_frames = _ 

486 

487 @property 

488 def basename(self) -> str: 

489 """str: Basename is the filename without path and without extension. If `basename_fn` is not None, it will be 

490 applied to `traj_file`.""" 

491 if self.traj_file: 491 ↛ exitline 491 didn't return from function 'basename', because the condition on line 491 was never false

492 if str(self.traj_file) == ".": 

493 return None 

494 return self.basename_fn(self.traj_file) 

495 

496 @property 

497 def extension(self) -> str: 

498 """str: Extension is the file extension of the trajectory file (self.traj_file).""" 

499 if isinstance(self._traj_file, Path): 

500 return self._traj_file.suffix 

501 else: 

502 return "." + self._traj_file.split(".")[-1] 

503 

504 @property 

505 def id(self) -> np.ndarray: 

506 """np.ndarray: id is an array of unique identifiers which identify the frames in 

507 this SingleTraj object when multiple Trajectories are considered. 

508 

509 If the traj was initialized from an TrajEnsemble class, the traj gets a unique 

510 identifier (traj_num) which will also be put into the id array, so that id 

511 can have two shapes ((n_frames, ), (n_frames, 2)) This corresponds to 

512 self.id.ndim = 1 and self.id.ndim = 2. In the latter case self.id[:,1] are the 

513 frames and self.id[:,0] is an array full of traj_num. 

514 

515 """ 

516 values = self._original_frame_indices 

517 if isinstance(values, (int, np.integer)): 517 ↛ 518line 517 didn't jump to line 518, because the condition on line 517 was never true

518 if self.traj_num is None: 

519 return np.array([values]) 

520 else: 

521 return np.array([[self.traj_num, values]]) 

522 else: 

523 for i, ind in enumerate(self.index): 

524 if ind is not None: 

525 values = values[ind] 

526 

527 # if reduced all the way to single frame 

528 if isinstance(values, (int, np.integer)): 

529 if self.traj_num is None: 529 ↛ 530line 529 didn't jump to line 530, because the condition on line 529 was never true

530 return np.array([values]) 

531 else: 

532 return np.array([[self.traj_num, values]]) 

533 

534 # else 

535 if self.traj_num is None: 

536 return values 

537 else: 

538 return np.array([np.full(len(values), self.traj_num), values]).T 

539 

540 @property 

541 def n_frames(self) -> int: 

542 """int: Number of frames in traj. 

543 

544 Loads the traj into memory if not in HDF5 file format. Be aware. 

545 

546 """ 

547 if any([isinstance(ind, (int, np.integer)) for ind in self.index]): 

548 self._loaded_once = True 

549 ind = [i for i in self.index if isinstance(i, (int, np.integer))][0] 

550 self._orig_frames = ind 

551 return 1 

552 elif self._traj_file.suffix == ".h5": 

553 with h5.File(self.traj_file, "r") as file: 

554 if self.index == (None,): 

555 n_frames = np.arange(file["coordinates"].shape[0]) 

556 else: 

557 for i, ind in enumerate(self.index): 

558 if i == 0: 

559 n_frames = np.arange(file["coordinates"].shape[0]) 

560 if ind is not None: 560 ↛ 561line 560 didn't jump to line 561, because the condition on line 560 was never true

561 n_frames = n_frames[ind] 

562 else: 

563 if ind is not None: 563 ↛ 557line 563 didn't jump to line 557, because the condition on line 563 was never false

564 n_frames = n_frames[ind] 

565 if not self._loaded_once: 

566 self._orig_frames = np.arange(file["coordinates"].shape[0]) 

567 

568 # return single int or length of array 

569 if isinstance(n_frames, (int, np.integer)): 569 ↛ 570line 569 didn't jump to line 570, because the condition on line 569 was never true

570 return n_frames 

571 else: 

572 return len(n_frames) 

573 elif self._traj_file.suffix == ".xtc": 

574 reader = mda.coordinates.XTC.XTCReader(self.traj_file) 

575 if self.index == (None,): 

576 n_frames = np.arange(reader.n_frames) 

577 else: 

578 for i, ind in enumerate(self.index): 

579 if i == 0: 

580 n_frames = np.arange(reader.n_frames)[ind] 

581 else: 

582 n_frames = n_frames[ind] 

583 if ind is None: 

584 n_frames = n_frames[0] 

585 if not self._loaded_once: 

586 self._loaded_once = True 

587 self._orig_frames = np.arange(reader.n_frames) 

588 if isinstance(n_frames, (int, np.integer)): 588 ↛ 589line 588 didn't jump to line 589, because the condition on line 588 was never true

589 return n_frames 

590 else: 

591 return len(n_frames) 

592 else: 

593 self.load_traj() 

594 return self.traj.n_frames 

595 

596 @property 

597 def _n_frames_base_h5_file(self) -> int: 

598 """int: Can be used to get n_frames without loading an HDF5 into memory.""" 

599 with h5.File(self.traj_file, "r") as file: 

600 return file["coordinates"].shape[0] 

601 

602 @property 

603 def CVs_in_file(self) -> bool: 

604 """bool: Is True, if `traj_file` has exyension .h5 and contains CVs.""" 

605 if self.extension == ".h5": 

606 with h5.File(self.traj_file, "r") as file: 

607 if "CVs" in file.keys(): 607 ↛ 609line 607 didn't jump to line 609, because the condition on line 607 was never false

608 return True 

609 return False 

610 

611 @property 

612 def n_atoms(self) -> int: 

613 """int: Number of atoms in traj. 

614 

615 Loads the traj into memory if not in HDF5 file format. Be aware. 

616 

617 """ 

618 if self.extension == ".h5": 

619 with h5.File(self.traj_file, "r") as file: 

620 return file["coordinates"].shape[1] 

621 else: 

622 self.load_traj() 

623 return self.traj.n_atoms 

624 

625 @property 

626 def n_residues(self) -> int: 

627 """int: Number of residues in traj.""" 

628 return self.top.n_residues 

629 

630 @property 

631 def n_chains(self) -> int: 

632 """int: Number of chains in traj.""" 

633 return self.top.n_chains 

634 

635 @property 

636 def top(self) -> md.Topology: 

637 """mdtraj.Topology: The structure of a Topology object is similar to that of a PDB file. 

638 

639 It consists. of a set of Chains (often but not always corresponding to 

640 polymer chains). Each Chain contains a set of Residues, and each Residue 

641 contains a set of Atoms. In addition, the Topology stores a list of which 

642 atom pairs are bonded to each other. 

643 Atom and residue names should follow the PDB 3.0 nomenclature for all 

644 molecules for which one exists 

645 

646 Attributes: 

647 chains (generator): Iterate over chains. 

648 residues (generator): Iterate over residues. 

649 atoms (generator): Iterate over atoms. 

650 bonds (generator): Iterate over bonds. 

651 

652 """ 

653 if self.top_file: 653 ↛ 661line 653 didn't jump to line 661, because the condition on line 653 was never false

654 if self.top_file != ".": 

655 if self._validate_uri(self.top_file): 

656 return md.load_pdb(self.top_file).top 

657 if not os.path.isfile(self.top_file): 657 ↛ 658line 657 didn't jump to line 658, because the condition on line 657 was never true

658 raise FileNotFoundError( 

659 errno.ENOENT, os.strerror(errno.ENOENT), self.top_file 

660 ) 

661 if self.backend == "no_load" and not self.extension == ".h5" and self.traj_file: 

662 return md.load_topology(self.top_file) 

663 if self.extension == ".h5": 

664 return md.load_topology(self.top_file) 

665 if self.backend == "no_load": 665 ↛ 666line 665 didn't jump to line 666, because the condition on line 665 was never true

666 return md.load_topology(self.top_file) 

667 if self.backend == "mdtraj": 667 ↛ 670line 667 didn't jump to line 670, because the condition on line 667 was never false

668 return self.traj.top 

669 else: 

670 return self.topology 

671 

672 @property 

673 def CVs(self) -> dict[str, np.ndarray]: 

674 """dict: Returns a simple dict from the more complicated self._CVs xarray Dataset. 

675 

676 If self._CVs is empty and self.traj_file is a HDF5 (.h5) file, the contents 

677 of the HDF5 will be checked, whether CVs have been stored there. 

678 If not and empty dict will be returned. 

679 

680 """ 

681 if self._CVs: 

682 return { 

683 key: val.dropna(val.attrs["feature_axis"]).values.squeeze(0) 

684 if np.any(np.isnan(val)) 

685 else val.values.squeeze(0) 

686 for key, val in self._CVs.data_vars.items() 

687 } 

688 else: 

689 return {} 

690 

691 def _validate_uri(self, uri: str) -> bool: 

692 """Checks whether `uri` is a valid uri.""" 

693 from encodermap.misc.misc import _validate_uri 

694 

695 return _validate_uri(str(uri)) 

696 

697 def load_traj( 

698 self, 

699 new_backend: Literal["no_load", "mdtraj"] = "mdtraj", 

700 ) -> None: 

701 """Loads the trajectory, with a new specified backend. 

702 

703 After this is called the instance variable self.trajectory 

704 will contain an mdtraj Trajectory object. 

705 

706 Args: 

707 new_backend (str, optional): Can either be: 

708 * `mdtraj` to load the trajectory using mdtraj. 

709 * `no_load` to not load the traj (unload). 

710 Defaults to `mdtraj`. 

711 

712 """ 

713 if self.backend == new_backend: 

714 return 

715 if self.backend == "mdtraj" and new_backend == "no_load": 715 ↛ 716line 715 didn't jump to line 716, because the condition on line 715 was never true

716 self.unload() 

717 if self.backend == "no_load" and new_backend == "mdtraj": 717 ↛ exitline 717 didn't return from function 'load_traj', because the condition on line 717 was never false

718 self.backend = new_backend 

719 # call the setter again 

720 try: 

721 self._traj = self.traj_file 

722 except Exception: 

723 self.backend = "no_load" 

724 raise 

725 self.topology = self.top 

726 

727 def select( 

728 self, 

729 sel_str: str = "all", 

730 ) -> np.ndarray: 

731 """Execute a selection against the topology 

732 

733 Args: 

734 sel_str (str, optional): What to select. Defaults to 'all'. 

735 

736 See also: 

737 https://mdtraj.org/1.9.4/atom_selection.html 

738 

739 Examples: 

740 >>> import encodermap as em 

741 >>> traj = em.SingleTraj("https://files.rcsb.org/view/1GHC.pdb") 

742 >>> select = traj.top.select("name CA and resSeq 1") 

743 >>> select 

744 array([1]) 

745 

746 >>> traj = em.SingleTraj("https://files.rcsb.org/view/1GHC.pdb") 

747 >>> select = traj.top.select("name CA and resSeq 1") 

748 >>> traj.top.atom(select[0]) 

749 MET1-CA 

750 

751 """ 

752 return self.top.select(sel_str) 

753 

754 def unload( 

755 self, 

756 CVs: bool = False, 

757 ) -> None: 

758 """Clears up RAM by deleting the trajectory Info and the CV data. 

759 

760 If CVs is set to True the loaded CVs will also be deleted. 

761 

762 Args: 

763 CVs (bool, optional): Whether to also delete CVs, defaults to False. 

764 

765 """ 

766 if self.backend == "no_load": 766 ↛ 767line 766 didn't jump to line 767, because the condition on line 766 was never true

767 return 

768 self.backend = "no_load" 

769 for key in self._mdtraj_attr: 

770 try: 

771 del self.__dict__[key] 

772 except KeyError: 

773 pass 

774 if CVs: 774 ↛ 775line 774 didn't jump to line 775, because the condition on line 774 was never true

775 self._CVs = xr.Dataset() 

776 self.trajectory, self.topology = False, False 

777 

778 def _gen_ensemble(self) -> TrajEnsemble: 

779 """Creates an TrajEnsemble class with this traj in it. 

780 

781 This method is needed to add two SingleTraj objects 

782 along the `trajectory` axis with the method add_new_traj. 

783 This method is also called by the __getitem__ method of the TrajEnsemble class. 

784 

785 """ 

786 if self.traj_file != ".": 

787 self.info_all = TrajEnsemble( 

788 [self._traj_file], 

789 [self._top_file], 

790 backend=self.backend, 

791 common_str=[self.common_str], 

792 ) 

793 else: 

794 self.info_all = TrajEnsemble( 

795 [self.traj], 

796 [self.top], 

797 backend=self.backend, 

798 common_str=[self.common_str], 

799 ) 

800 self.info_all.trajs[0]._CVs = self._CVs 

801 self.info_all.trajs[0].traj_num = self.traj_num 

802 return self.info_all 

803 

804 def _add_along_traj(self, y: SingleTraj) -> TrajEnsemble: 

805 """Puts self and y into a TrajEnsemble object. 

806 

807 This way the trajectories are not appended along the timed 

808 axis but rather along the `trajectory` axis. 

809 

810 Args: 

811 y (SingleTraj): The other ep.SingleTraj trajectory. 

812 

813 """ 

814 class_1 = self._gen_ensemble() 

815 class_2 = y._gen_ensemble() 

816 new_class = class_1 + class_2 

817 return new_class 

818 

819 def get_single_frame(self, key: int) -> SingleTraj: 

820 """Returns a single frame from the trajectory. 

821 

822 Args: 

823 key (Union[int, np.int]): Index of the frame. 

824 

825 Examples: 

826 >>> # Load traj from pdb 

827 >>> import encodermap as em 

828 >>> traj = em.SingleTraj("https://files.rcsb.org/view/1GHC.pdb") 

829 >>> traj.n_frames 

830 14 

831 

832 >>> # Load the same traj and give it a number for recognition in a set of multiple trajs 

833 >>> traj = em.SingleTraj("https://files.rcsb.org/view/1GHC.pdb", traj_num=5) 

834 >>> frame = traj.get_single_frame(2) 

835 >>> frame.id 

836 array([[5, 2]]) 

837 

838 """ 

839 return self.__getitem__(key) 

840 

841 def show_traj(self, gui: bool = True) -> nglview.view: 

842 """Returns an nglview view object. 

843 

844 Returns: 

845 view (nglview.widget): The nglview widget object. 

846 

847 """ 

848 import nglview 

849 

850 view = nglview.show_mdtraj(self.traj, gui=gui) 

851 return view 

852 

853 def load_CV( 

854 self, 

855 data: SingleTrajFeatureType, 

856 attr_name: Optional[str] = None, 

857 cols: Optional[list[int]] = None, 

858 labels: Optional[list[str]] = None, 

859 override: bool = False, 

860 ) -> None: 

861 """Load CVs into traj. Many options are possible. Provide xarray, 

862 numpy array, em.loading.feature, em.featurizer, and even string! 

863 

864 This method loads CVs into the SingleTraj class. Many ways of doing so are available: 

865 * np.ndarray: The easiest way. Provide a np array and a name for the array and the data 

866 will be saved as a instance variable, accesible via instance.name. 

867 * xarray.DataArray: You can load a multidimensional xarray as data into the class. Please 

868 refer to xarrays own documentation if you want to create one yourself. 

869 * xarray.Dataset: You can add another dataset to the existing _CVs. 

870 * em.loading.feature: If you provide one of the features from em.loading.features the resulting 

871 features will be loaded and also placed under the provided name. 

872 * em.Featurizer: If you provide a full featurizer, the data will be generated and put as an 

873 instance variable as the provided name. 

874 * str: If a string is provided, the data will be loaded from a .txt, .npy, or NetCDF / HDF5 .nc file. 

875 

876 Args: 

877 data (Union[str, np.ndarray, xr.DataArray, em.loading.feature, em.Featurizer]): 

878 The CV to load. Either as numpy array, xarray DataArray, encodermap or pyemma feature, or full 

879 encodermap Featurzier. 

880 attr_name (Union[None, str], optional): The name under which the CV should be found in the class. 

881 Is needed, if a raw numpy array is passed, otherwise the name will be generated from the filename 

882 (if data == str), the DataArray.name (if data == xarray.DataArray), or the feature name. 

883 cols (Union[list, None], optional): A list specifying the columns to use for the highD data. 

884 If your highD data contains (x,y,z,...)-errors or has an enumeration 

885 column at col=0 this can be used to remove this unwanted data. 

886 labels (Union[list, str, None], optional): If you want to label the data you provided pass a list of str. 

887 If set to None, the features in this dimension will be labelled as 

888 [f"{attr_name.upper()} FEATURE {i}" for i in range(self.n_frames)]. If a str is provided, the features 

889 will be labelled as [f"{attr_name.upper()} {label.upper()} {i}" for i in range(self.n_frames)]. If a list of str 

890 is provided it needs to have the same length as the traj has frames. Defaults to None. 

891 override (bool): Whether to overwrite existing CVs. The method will also 

892 print a message which CVs have been overwritten. 

893 

894 Examples: 

895 >>> # Load the backbone torsions from a time-resolved NMR ensemble from the pdb 

896 >>> import encodermap as em 

897 >>> traj = em.SingleTraj("https://files.rcsb.org/view/1GHC.pdb") 

898 >>> central_dihedrals = em.loading.features.CentralDihedrals(traj.top) 

899 >>> traj.load_CV(central_dihedrals) 

900 >>> traj.central_dihedrals.shape 

901 (1, 14, 222) 

902 >>> # The values are stored in an xarray Dataset to track every possible datafield 

903 >>> traj = em.SingleTraj("https://files.rcsb.org/view/1GHC.pdb") 

904 >>> traj.load_CV(em.loading.features.CentralDihedrals(traj.top)) 

905 >>> print(traj._CVs['central_dihedrals']['CENTRALDIHEDRALS'].values[:2]) 

906 ['CENTERDIH PSI RESID MET: 1 CHAIN 0' 

907 'CENTERDIH OMEGA RESID MET: 1 CHAIN 0'] 

908 

909 Raises: 

910 FileNotFoundError: When the file given by `data` does not exist. 

911 IOError: When the provided filename does not have .txt, .npy or .nc extension. 

912 TypeError: When `data` does not match the specified input types. 

913 Exception: When a numpy array has been passed as `data` and no `attr_name` has been provided. 

914 BadError: When the provided `attr_name` is str, but can not be a python identifier. 

915 

916 """ 

917 from .trajinfo_utils import load_CVs_singletraj 

918 

919 new_CVs = load_CVs_singletraj(data, self, attr_name, cols, labels) 

920 if self.traj_num is not None: 

921 assert new_CVs.coords["traj_num"] == np.array([self.traj_num]), print( 

922 data, self.traj_num, new_CVs.coords["traj_num"] 

923 ) 

924 

925 # check the sizes 

926 len_CVs = new_CVs.coords["frame_num"].shape[0] 

927 if self._CVs: 

928 n_frames = self._CVs.coords["frame_num"].shape[0] 

929 else: 

930 n_frames = self.n_frames 

931 if n_frames != len_CVs: 

932 raise Exception( 

933 f"Loading the requested {data} CVs is not possible, as " 

934 f"they are not aligned with the number of frames in the " 

935 f"trajectory. The CVs have {len_CVs} frames, the trajectory " 

936 f"{self._traj_file} has {n_frames} frames." 

937 ) 

938 

939 # check the keys and whether they get overwritten 

940 if hasattr(new_CVs, "keys"): 

941 new_keys = new_CVs.keys() 

942 else: 

943 new_keys = set([new_CVs.name]) 

944 if override: 

945 if overwritten_keys := self._CVs.keys() & new_keys: 

946 print( 

947 f"Overwriting the following CVs with new values: " 

948 f"{overwritten_keys}." 

949 ) 

950 self._CVs = xr.merge( 

951 [new_CVs, self._CVs], 

952 combine_attrs="override", 

953 compat="override", 

954 join="left", 

955 ) 

956 else: 

957 try: 

958 self._CVs = xr.merge([self._CVs, new_CVs], combine_attrs=combine_attrs) 

959 except xr.core.merge.MergeError as e: 

960 msg = ( 

961 f"Could not add the CV `{attr_name}` to the CVs of the traj " 

962 f"likely due to it being already in the CVs " 

963 f"({list(self.CVs.keys())}). Set `override` to True to " 

964 f"overwrite these CVs." 

965 ) 

966 raise Exception(msg) from e 

967 

968 def save_CV_as_numpy( 

969 self, 

970 attr_name: str, 

971 fname: Optional[str] = None, 

972 overwrite: bool = False, 

973 ) -> None: 

974 """Saves the highD data of this traj. 

975 

976 This got its own method for parallelization purposes. 

977 

978 Args: 

979 attr_name (str): Name of the CV to save. 

980 fname (str, optional): Can be either 

981 overwrite (bool, opt): Whether to overwrite the file. Defaults to False. 

982 

983 Raises: 

984 IOError: When the file already exists and overwrite is set to False. 

985 

986 """ 

987 if fname is None: 

988 fname = f"{self.basename}_{attr_name}.npy" 

989 if os.path.isdir(fname): 

990 fname = os.path.join(fname, f"{self.basename}_{attr_name}.npy") 

991 if os.path.isfile(fname) and not overwrite: 

992 raise IOError(f"{fname} already exists. Set overwrite=True to overwrite.") 

993 np.save(fname, self.CVs[attr_name]) 

994 

995 def atom_slice( 

996 self, 

997 atom_indices: np.ndarray, 

998 inplace: bool = False, 

999 ) -> Union[None, SingleTraj]: 

1000 """Create a new trajectory from a subset of atoms. 

1001 

1002 Args: 

1003 atom_indices (Union[list, np.array]): The indices of the 

1004 atoms to keep. 

1005 inplace(bool, optional): Whether to overwrite the current instance, 

1006 or return a new instance. Defaults to False. 

1007 

1008 """ 

1009 if self._CVs: 1009 ↛ 1013line 1009 didn't jump to line 1013, because the condition on line 1009 was never false

1010 warnings.warn( 

1011 "Dropping CVs from trajectory. Defining CVs for atom slice is currently not possible." 

1012 ) 

1013 new = SingleTraj( 

1014 self.traj.atom_slice(atom_indices, inplace=inplace), 

1015 common_str=self.common_str, 

1016 backend="mdtraj", 

1017 index=self.index, 

1018 traj_num=self.traj_num, 

1019 ) 

1020 if inplace: 

1021 self = new 

1022 else: 

1023 return new 

1024 

1025 def join(self, other: SingleTraj) -> md.Trajectory: 

1026 """Join two trajectories together along the time/frame axis. 

1027 

1028 Returns a mdtraj.Trajectory and thus loses CVs, filenames, etc. 

1029 

1030 """ 

1031 if isinstance(other, md.core.trajectory.Trajectory): 1031 ↛ 1032line 1031 didn't jump to line 1032, because the condition on line 1031 was never true

1032 return self.traj.join(other) 

1033 return self.traj.join(other.traj) 

1034 

1035 def stack(self, other: SingleTraj) -> md.Trajectory: 

1036 """Stack two trajectories along the atom axis 

1037 

1038 Returns a mdtraj.Trajectory and thus loses CVs, filenames, etc. 

1039 

1040 """ 

1041 if isinstance(other, md.core.trajectory.Trajectory): 1041 ↛ 1042line 1041 didn't jump to line 1042, because the condition on line 1041 was never true

1042 return self.traj.stack(other) 

1043 return self.traj.stack(other.traj) 

1044 

1045 def superpose( 

1046 self, 

1047 reference: Union[md.Trajectory, SingleTraj], 

1048 frame: int = 0, 

1049 atom_indices: Optional[np.ndarray] = None, 

1050 ref_atom_indices: Optional[np.ndarray] = None, 

1051 parallel: bool = True, 

1052 ) -> SingleTraj: 

1053 """Superpose each conformation in this trajectory upon a reference 

1054 

1055 Args: 

1056 reference (Union[mdtraj.Trajectory, SingleTraj]): The reference frame to align to. 

1057 reame (int, optional): Align to this frame in reference. Defaults to 1. 

1058 atom_indices (Union[np.array, None], optional): Indices in self, used to calculate 

1059 RMS values. Defaults to None, whcih means all atoms will be used. 

1060 ref_atom_indices (Union[np.array, None], optional): Indices in reference, used to calculate 

1061 RMS values. Defaults to None, whcih means all atoms will be used. 

1062 parallel (bool, optional): Use OpenMP to run the superposition in parallel over multiple cores. 

1063 

1064 Returns: 

1065 SingleTraj: A new aligned trajectory. 

1066 

1067 """ 

1068 if isinstance(reference, md.core.trajectory.Trajectory): 

1069 new = SingleTraj( 

1070 self.traj.superpose( 

1071 reference, frame, atom_indices, ref_atom_indices, parallel 

1072 ) 

1073 ) 

1074 new = SingleTraj( 

1075 self.traj.superpose( 

1076 reference.traj, frame, atom_indices, ref_atom_indices, parallel 

1077 ) 

1078 ) 

1079 if self.traj_file: 

1080 new.traj_file = self.traj_file 

1081 if self.top_file: 

1082 new.top_file = self.top_file 

1083 if self._CVs: 

1084 raise NotImplementedError("CV inheritance not implemented yet.") 

1085 return new 

1086 

1087 def save( 

1088 self, 

1089 fname: str, 

1090 CVs: Union[str, list[str]] = "all", 

1091 overwrite: bool = False, 

1092 ) -> None: 

1093 """Save the trajectory as HDF5 fileformat to disk, 

1094 

1095 Args: 

1096 fname (str): The filename. 

1097 CVs (Union[List, 'all'], optional): Either provide a list of strings 

1098 of the CVs you would like to save to disk, or set to 'all' to save 

1099 all CVs. Defaults to []. 

1100 overwrite (bool, optional): Whether to force overwrite an existing file. 

1101 Defaults to False. 

1102 

1103 Raises: 

1104 IOError: When the file already exists and overwrite is False. 

1105 

1106 """ 

1107 if os.path.isfile(fname) and not overwrite: 

1108 raise IOError(f"{fname} already exists. Set overwrite=True to overwrite.") 

1109 else: 

1110 self.traj.save_hdf5(fname, force_overwrite=overwrite) 

1111 if CVs == "all": 

1112 save_netcdf_alongside_mdtraj(fname, self._CVs) 

1113 return 

1114 if self._CVs and CVs: 

1115 with h5.File(fname, "a") as file: 

1116 if "CVs" in list(file.keys()): 

1117 grp = file["CVs"] 

1118 else: 

1119 grp = file.create_group("CVs") 

1120 for key in CVs: 

1121 value = self._CVs[key] 

1122 assert self.n_frames == value.shape[1] 

1123 grp.create_dataset(name=key, data=value) 

1124 

1125 def __copy__(self): 

1126 cls = self.__class__ 

1127 result = cls.__new__(cls) 

1128 result.__dict__.update(self.__dict__) 

1129 return result 

1130 

1131 def __deepcopy__(self, memo): 

1132 from copy import deepcopy 

1133 

1134 cls = self.__class__ 

1135 result = cls.__new__(cls) 

1136 memo[id(self)] = result 

1137 for k, v in self.__dict__.items(): 

1138 setattr(result, k, deepcopy(v, memo)) 

1139 return result 

1140 

1141 def __hash__(self) -> int: 

1142 hash_value = hash(self.top) 

1143 # combine with hashes of arrays 

1144 hash_value ^= _hash_numpy_array(self._xyz) 

1145 hash_value ^= _hash_numpy_array(self.time) 

1146 if self._unitcell_lengths is not None: 1146 ↛ 1147line 1146 didn't jump to line 1147, because the condition on line 1146 was never true

1147 hash_value ^= _hash_numpy_array(self._unitcell_lengths) 

1148 if self._unitcell_angles is not None: 1148 ↛ 1149line 1148 didn't jump to line 1149, because the condition on line 1148 was never true

1149 hash_value ^= _hash_numpy_array(self._unitcell_angles) 

1150 return hash_value 

1151 

1152 def __eq__(self, other: SingleTraj) -> bool: 

1153 """Two SingleTraj objetcs are the same, when the trajectories are the same, 

1154 the files are the same and the loaded CVs are the same.""" 

1155 trajs = self.__hash__() == other.__hash__() 

1156 data = self._CVs.equals(other._CVs) 

1157 files = self._traj_file == other._traj_file 

1158 return all([trajs, data, files]) 

1159 

1160 def __reversed__(self) -> SingleTraj: 

1161 """Reverses the frame order of the traj. Same as traj[::-1]""" 

1162 return self[::-1] 

1163 

1164 def __enter__(self): 

1165 """Enters context manager. Inside context manager, the traj stays loaded.""" 

1166 self.load_traj() 

1167 return self 

1168 

1169 def __exit__(self, type, value, traceback): 

1170 """Exits the context manager and deletes unwanted variables.""" 

1171 self.unload() 

1172 

1173 def __iter__(self): 

1174 """Iterate over frames in this class. Returns the correct 

1175 CVs along with the frame of the trajectory.""" 

1176 self._index = 0 

1177 if len(self) == 0 and self.index is None: 1177 ↛ 1178line 1177 didn't jump to line 1178, because the condition on line 1177 was never true

1178 self.load_traj() 

1179 return self 

1180 

1181 def __next__(self): 

1182 if len(self.id) == 1: 1182 ↛ 1183line 1182 didn't jump to line 1183, because the condition on line 1182 was never true

1183 return self 

1184 if self._index >= self.n_frames: 

1185 raise StopIteration 

1186 else: 

1187 self._index += 1 

1188 return self[self._index - 1] 

1189 

1190 def __getitem__(self, key): 

1191 """This method returns another trajectory as an SingleTraj class. 

1192 

1193 Args: 

1194 key (Union[int, list[int], np.ndarray, slice]): Indexing the trajectory 

1195 can be done by int (returns a traj with 1 frame), lists of int or 

1196 np.ndarray (returns a new traj with len(traj) == len(key)), or 

1197 slice ([::3]), which returns a new traj with the correct number of 

1198 frames. 

1199 

1200 Returns: 

1201 Info_Single: An SingleTraj object with this frame in it. 

1202 

1203 """ 

1204 if not isinstance(key, (int, np.int_, list, np.ndarray, slice)): 

1205 raise TypeError( 

1206 f"Indexing of `SingleTraj` requires the index to " 

1207 f"be one of the following types: (int, np.int, " 

1208 f"list, np.ndarray, slice), you provided {type(key)}." 

1209 ) 

1210 

1211 if any([isinstance(i, (int, np.integer)) for i in self.index]) and key != 0: 

1212 raise IndexError("SingleTraj index out of range for traj with 1 frame.") 

1213 

1214 if isinstance(key, (int, np.integer)): 

1215 if key > self.n_frames: 1215 ↛ 1216line 1215 didn't jump to line 1216, because the condition on line 1215 was never true

1216 raise IndexError( 

1217 f"Index {key} out of range for traj with " 

1218 f"{self.n_frames} frames." 

1219 ) 

1220 if isinstance(key, (list, np.ndarray)): 

1221 if any([k > self.n_frames for k in key]): 1221 ↛ 1222line 1221 didn't jump to line 1222, because the condition on line 1221 was never true

1222 raise IndexError( 

1223 f"At least one index in {key} out of range for" 

1224 f"traj with {self.n_frames} frames." 

1225 ) 

1226 

1227 # append the index to the list of "transformations" 

1228 new_index = (*self.index, key) 

1229 

1230 # build a new traj from that 

1231 if self.backend == "no_load": 

1232 traj_out = SingleTraj( 

1233 self.traj_file, 

1234 self.top_file, 

1235 backend=self.backend, 

1236 common_str=self.common_str, 

1237 index=new_index, 

1238 traj_num=self.traj_num, 

1239 basename_fn=self.basename_fn, 

1240 ) 

1241 else: 

1242 traj_out = SingleTraj( 

1243 self.trajectory[key], 

1244 self.topology, 

1245 backend=self.backend, 

1246 common_str=self.common_str, 

1247 index=new_index, 

1248 traj_num=self.traj_num, 

1249 basename_fn=self.basename_fn, 

1250 ) 

1251 traj_out._traj_file = self._traj_file 

1252 traj_out._top_file = self._top_file 

1253 assert traj_out._traj_file == self._traj_file 

1254 

1255 # the original_frames 

1256 traj_out._orig_frames = self._orig_frames 

1257 traj_out._loaded_once = self._loaded_once 

1258 

1259 # last the CVs 

1260 if self._CVs: 

1261 traj_out._CVs = self._CVs.loc[{"frame_num": key}] 

1262 

1263 return traj_out 

1264 

1265 def __add__(self, y: SingleTraj) -> TrajEnsemble: 

1266 """Addition of two SingleTraj classes yields TrajEnsemble class. A `trajectory ensemble`. 

1267 

1268 Args: 

1269 y (encodermap.SingleTraj): The other traj, that will be added. 

1270 

1271 Returns: 

1272 encodermap.TrajEnsemble: The new trajs. 

1273 

1274 """ 

1275 return self._add_along_traj(y) 

1276 

1277 def __getattr__(self, attr): 

1278 """What to do when attributes can not be obtained in a normal way?. 

1279 

1280 This method allows access of the self.CVs dictionary's values as 

1281 instance variables. Furthermore, of a mdtraj variable is called, 

1282 the traj is loaded and the correct variable is returned. 

1283 

1284 """ 

1285 if attr in self._mdtraj_attr: 

1286 self.load_traj() 

1287 return getattr(self.traj, attr) 

1288 # if self.index == (None, ): 

1289 # return getattr(self.traj, attr) 

1290 # else: 

1291 # if len(self.id) == len(self.traj): 

1292 # return getattr(self.traj, attr) 

1293 # else: 

1294 # return getattr(self.traj[self.index], attr) 

1295 elif attr in self._CVs: 1295 ↛ 1305line 1295 didn't jump to line 1305, because the condition on line 1295 was never false

1296 # if self._already_indexed: 

1297 # return self._CVs[attr].values.squeeze() 

1298 # if self._original_frame_indices.size != 0: 

1299 # index = self._original_frame_indices 

1300 # elif self._original_frame_indices.size == 0 and self.index is not None: 

1301 # index = self.index 

1302 # else: 

1303 # return self._CVs[attr].values.squeeze() 

1304 return self._CVs[attr].values.squeeze() # [index] 

1305 elif attr == "traj": 

1306 self.__getattribute__(attr) 

1307 elif attr == "id": 

1308 self.__getattribute__(attr) 

1309 else: 

1310 raise AttributeError(f"'SingleTraj' object has no attribute '{attr}'") 

1311 

1312 def _string_summary(self) -> str: 

1313 """Returns a summary about the current instance. 

1314 

1315 Number of frames, index, loaded CVs. 

1316 

1317 """ 

1318 s = f"encodermap.SingleTraj object. Current backend is {self.backend}." 

1319 if self.basename: 1319 ↛ 1321line 1319 didn't jump to line 1321, because the condition on line 1319 was never false

1320 s += f" Basename is {self.basename}." 

1321 if self.index is not None: 1321 ↛ 1324line 1321 didn't jump to line 1324, because the condition on line 1321 was never false

1322 with np.printoptions(threshold=1, edgeitems=1): 

1323 s += f" At indices {self.index}." 

1324 if self._CVs: 

1325 for key, value in self._CVs.items(): 

1326 shape = value.shape 

1327 if not shape: 1327 ↛ 1328line 1327 didn't jump to line 1328, because the condition on line 1327 was never true

1328 shape = 1 

1329 s += f" CV {key} with shape {shape} loaded." 

1330 else: 

1331 s += " Not containing any CVs." 

1332 if "n_atoms" in self.__dict__.keys(): 1332 ↛ 1333line 1332 didn't jump to line 1333, because the condition on line 1332 was never true

1333 s += f" Containing {self.n_atoms} atoms." 

1334 if "n_frames" in self.__dict__.keys(): 1334 ↛ 1335line 1334 didn't jump to line 1335, because the condition on line 1334 was never true

1335 s += f" Containing {self.n_frames} frames." 

1336 if self.common_str: 1336 ↛ 1337line 1336 didn't jump to line 1337, because the condition on line 1336 was never true

1337 s += f" Common string is {self.common_str}." 

1338 return s 

1339 

1340 def __len__(self): 

1341 return self.n_frames 

1342 

1343 def __str__(self): 

1344 return self._string_summary() 

1345 

1346 def __repr__(self): 

1347 return f"<{self._string_summary()} Object at 0x{id(self):02x}>"