Static Code Examples#
These static code examples can also help you with your EncoderMap endeavors.
Cube Example#
This is the classic EncoderMap example. In this example, EncoderMap is used to project a 3D cube into 2D and back.
Cube Example
# Encodermap imports
import encodermap as em
# generating data:
high_d_data, ids = em.misc.random_on_cube_edges(10000, sigma=0.05)
# setting parameters:
parameters = em.Parameters()
parameters.main_path = em.misc.run_path("runs/cube/")
parameters.n_steps = 10000
parameters.dist_sig_parameters = (0.2, 3, 6, 1, 2, 6)
parameters.periodicity = float("inf")
# training:
e_map = em.EncoderMap(parameters, high_d_data)
e_map.train()
# projecting:
low_d_projection = e_map.encode(high_d_data)
generated = e_map.generate(low_d_projection)
#########################################################################
# Plotting:
#########################################################################
# Third Party Imports
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import ( # somehow this conflicts with tensorflow if imported earlier
Axes3D,
)
fig = plt.figure()
axe = fig.add_subplot(111, projection="3d")
axe.scatter(
high_d_data[:, 0],
high_d_data[:, 1],
high_d_data[:, 2],
c=ids,
marker="o",
linewidths=0,
cmap="tab10",
)
fig, axe = plt.subplots()
axe.scatter(
low_d_projection[:, 0],
low_d_projection[:, 1],
c=ids,
s=5,
marker="o",
linewidths=0,
cmap="tab10",
)
fig = plt.figure()
axe = fig.add_subplot(111, projection="3d")
axe.scatter(
generated[:, 0],
generated[:, 1],
generated[:, 2],
c=ids,
marker="o",
linewidths=0,
cmap="tab10",
)
plt.show()
Cube Distance Analysis#
In this example a way of visualizing the mapping of high-dimensional differences to low-dimensional distances is introduced.
Cube Distance Analysis
# Third Party Imports
import matplotlib.pyplot as plt
# Encodermap imports
import encodermap as em
data, ids = em.misc.random_on_cube_edges(1000, sigma=0.05)
dist_sig_parameters = (0.2, 3, 6, 1, 2, 6)
periodicity = float("inf")
axe = em.plot.distance_histogram(data, periodicity, dist_sig_parameters, bins=50)
plt.show()
Trp Cage#
In this example, you can work with actual MD data from simulations of the Trp-cage protein.
Trp Cage
# Standard Library Imports
import os
# Third Party Imports
import matplotlib.pyplot as plt
import numpy as np
# Encodermap imports
import encodermap as em
# setting parameters
data_path = "./data"
run_path = em.misc.run_path("./runs")
csv_path = os.path.join(data_path, "trp_cage.csv") # can be downloaded from:
# https://www.kaggle.com/tobiasle/trp-cage-dihedrals
parameters = em.Parameters()
parameters.main_path = run_path
parameters.n_steps = 50000
print("loading data ...")
data = np.loadtxt(csv_path, skiprows=1, delimiter=",")
dihedrals = data[:, 3:41]
print("training autoencoder ...")
e_map = em.EncoderMap(parameters, dihedrals)
e_map.train()
print("projecting data ...")
low_d_projection = e_map.encode(dihedrals)
print("plotting result ...")
fig, axe = plt.subplots()
caxe = axe.scatter(
low_d_projection[:, 0],
low_d_projection[:, 1],
c=data[:, -1],
s=0.1,
cmap="nipy_spectral",
marker="o",
linewidths=0,
)
cbar = fig.colorbar(caxe)
cbar.set_label("helix rmsd")
# generate structures along path
pdb_path = os.path.join(data_path, "trp_cage_extended.pdb")
generator = em.plot.PathGenerateDihedrals(axe, e_map, pdb_path)
plt.show()
Dihedral to Cartesian diUbi#
In this example, you can work with MD data from simulations of the M1-linked diUbi protein.
Data Generation
# Standard Library Imports
import copy
import locale
import os
# Third Party Imports
import matplotlib.pyplot as plt
import MDAnalysis as md
import numpy as np
import tensorflow as tf
# Encodermap imports
import encodermap as em
molname = "diubi"
# ######################### Load data #########################
# Data available at:
# https://www.kaggle.com/andrejberg/md-simulations-of-linear-ubiquitin-dimers
structure_path = "data/{}/01.pdb".format(molname)
trajectory_paths = ["data/{}/{:02d}.xtc".format(molname, i + 1) for i in range(12)]
uni = md.Universe(structure_path, trajectory_paths)
# select only atoms in the backbone or directly bound to the backbone. Atoms in the side chains are not supported (yet)
selected_atoms = uni.select_atoms(
"backbone or name H or name O1 or (name CD and resname PRO)"
)
# The MolData object takes care of calculating all the necessary information like dihedrals, angles, bondlengths ...
# You can provide a cache_path to store this information for the next time.
moldata = em.MolData(selected_atoms, cache_path="data/{}/cache".format(molname))
# ######################### Define parameters #########################
total_steps = 50000
# First we want to train without C_alpha cost.
# Finally, we want to activate the C_alpha cost to improve the long-range order of the generated conformations
parameters = em.ADCParameters()
parameters.main_path = em.misc.run_path("runs/{}".format(molname))
parameters.cartesian_cost_scale = 0
parameters.cartesian_cost_variant = "mean_abs"
parameters.cartesian_cost_scale_soft_start = (
int(total_steps / 10 * 9),
int(total_steps / 10 * 9) + 1000,
)
parameters.cartesian_pwd_start = (
1 # Calculate pairwise distances starting form the second backbone atom ...
)
parameters.cartesian_pwd_step = 3 # for every third atom. These are the C_alpha atoms
parameters.dihedral_cost_scale = 1
parameters.dihedral_cost_variant = "mean_abs"
parameters.distance_cost_scale = 0 # no distance cost in dihedral space
parameters.cartesian_distance_cost_scale = (
100 # instead we use distance cost in C_alpha distance space
)
parameters.cartesian_dist_sig_parameters = [400, 10, 5, 1, 2, 5]
parameters.checkpoint_step = max(1, int(total_steps / 10))
parameters.l2_reg_constant = 0.001
parameters.center_cost_scale = 0
parameters.id = molname
# ########### Check how distances are mapped with your choice of Sigmoid parameters ##################
pwd = em.misc.pairwise_dist(
moldata.central_cartesians[
::1000, parameters.cartesian_pwd_start :: parameters.cartesian_pwd_step
],
flat=True,
)
with tf.Session() as sess:
pwd = sess.run(pwd)
axes = em.plot.distance_histogram(
pwd, float("inf"), parameters.cartesian_dist_sig_parameters
)
plt.show()
# somehow matplotlib messes with this setting and causes problems in tensorflow
locale.setlocale(locale.LC_NUMERIC, "en_US.UTF-8")
# ######################### Get references from dummy model #########################
dummy_parameters = copy.deepcopy(parameters)
dummy_parameters.main_path = em.misc.create_dir(
os.path.join(parameters.main_path, "dummy")
)
dummy_parameters.n_steps = int(len(moldata.dihedrals) / parameters.batch_size)
dummy_parameters.summary_step = 1
e_map = em.AngleDihedralCartesianEncoderMapDummy(dummy_parameters, moldata)
e_map.train()
e_map.close()
e_map = None
costs = em.misc.read_from_log(
os.path.join(dummy_parameters.main_path, "train"),
["cost/angle_cost", "cost/dihedral_cost", "cost/cartesian_cost"],
)
means = []
for values in costs:
means.append(np.mean([i.value for i in values]))
parameters.angle_cost_reference = means[0]
parameters.dihedral_cost_reference = means[1]
parameters.cartesian_cost_reference = means[2]
np.savetxt(
os.path.join(dummy_parameters.main_path, "adc_cost_means.txt"), np.array(means)
)
# ######################### Run Training #########################
# First run without C_alpha cost
parameters.n_steps = parameters.cartesian_cost_scale_soft_start[0]
e_map = em.AngleDihedralCartesianEncoderMap(parameters, moldata)
e_map.train()
e_map.close()
e_map = None
# Now we turn on C_alpha cost and continue the training run
parameters.n_steps = total_steps - parameters.cartesian_cost_scale_soft_start[0]
parameters.cartesian_cost_scale = 1
ckpt_path = os.path.join(
parameters.main_path,
"checkpoints",
"step{}.ckpt".format(parameters.cartesian_cost_scale_soft_start[0]),
)
e_map = em.AngleDihedralCartesianEncoderMap(
parameters, moldata, checkpoint_path=ckpt_path
)
e_map.train()
e_map.close()
e_map = None
Data Analysis
# Standard Library Imports
import os
# Third Party Imports
import matplotlib.pyplot as plt
import MDAnalysis as md
import numpy as np
# Encodermap imports
import encodermap as em
molname = "diubi"
run_id = 3
step = 50000
selection_for_alignment = "resid 0:77"
main_path = "runs/{}/run{}".format(molname, run_id)
# ######################### Load data #########################
structure_path = "data/{}/01.pdb".format(molname)
trajectory_paths = ["data/{}/{:02d}.xtc".format(molname, i + 1) for i in range(12)]
uni = md.Universe(structure_path, trajectory_paths)
selected_atoms = uni.select_atoms(
"backbone or name H or name O1 or (name CD and resname PRO)"
)
moldata = em.MolData(selected_atoms, cache_path="data/{}/cache".format(molname))
# ######################### Load parameters and checkpoint #########################
parameters = em.ADCParameters.load(os.path.join(main_path, "parameters.json"))
e_map = em.AngleDihedralCartesianEncoderMap(
parameters,
moldata,
checkpoint_path=os.path.join(main_path, "checkpoints", "step{}.ckpt".format(step)),
read_only=True,
)
# ######################### Project Data to map #########################
projected = e_map.encode(moldata.dihedrals)
# ######################### Plot histogram with path generator and lasso Select #########################
hist, xedges, yedges = np.histogram2d(projected[:, 0], projected[:, 1], bins=500)
fig1, axe1 = plt.subplots()
caxe = axe1.imshow(
-np.log(hist.T),
origin="low",
extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]],
aspect="auto",
)
cbar = fig1.colorbar(caxe)
cbar.set_label("-ln(p)", labelpad=0)
axe1.set_title("Path Generator")
generator = em.plot.PathGenerateCartesians(
axe1,
e_map,
moldata,
vmd_path="/home/soft/bin/vmd",
align_reference=moldata.sorted_atoms,
align_select=selection_for_alignment,
)
fig2, axe2 = plt.subplots()
caxe = axe2.imshow(
-np.log(hist.T),
origin="low",
extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]],
aspect="auto",
)
cbar = fig2.colorbar(caxe)
cbar.set_label("-ln(p)", labelpad=0)
axe2.set_title("Selector")
selector = em.plot.PathSelect(
axe2,
projected,
moldata,
e_map.p.main_path,
vmd_path="/home/soft/bin/vmd",
align_reference=moldata.sorted_atoms,
align_select=selection_for_alignment,
)
plt.show()