Static Code Examples#

These static code examples can also help you with your EncoderMap endeavors.

Cube Example#

This is the classic EncoderMap example. In this example, EncoderMap is used to project a 3D cube into 2D and back.

Cube Example
# Encodermap imports
import encodermap as em


# generating data:
high_d_data, ids = em.misc.random_on_cube_edges(10000, sigma=0.05)

# setting parameters:
parameters = em.Parameters()
parameters.main_path = em.misc.run_path("runs/cube/")
parameters.n_steps = 10000
parameters.dist_sig_parameters = (0.2, 3, 6, 1, 2, 6)
parameters.periodicity = float("inf")

# training:
e_map = em.EncoderMap(parameters, high_d_data)
e_map.train()

# projecting:
low_d_projection = e_map.encode(high_d_data)
generated = e_map.generate(low_d_projection)


#########################################################################
# Plotting:
#########################################################################

# Third Party Imports
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import (  # somehow this conflicts with tensorflow if imported earlier
    Axes3D,
)


fig = plt.figure()
axe = fig.add_subplot(111, projection="3d")
axe.scatter(
    high_d_data[:, 0],
    high_d_data[:, 1],
    high_d_data[:, 2],
    c=ids,
    marker="o",
    linewidths=0,
    cmap="tab10",
)

fig, axe = plt.subplots()
axe.scatter(
    low_d_projection[:, 0],
    low_d_projection[:, 1],
    c=ids,
    s=5,
    marker="o",
    linewidths=0,
    cmap="tab10",
)

fig = plt.figure()
axe = fig.add_subplot(111, projection="3d")
axe.scatter(
    generated[:, 0],
    generated[:, 1],
    generated[:, 2],
    c=ids,
    marker="o",
    linewidths=0,
    cmap="tab10",
)

plt.show()

Cube Distance Analysis#

In this example a way of visualizing the mapping of high-dimensional differences to low-dimensional distances is introduced.

Cube Distance Analysis
# Third Party Imports
import matplotlib.pyplot as plt

# Encodermap imports
import encodermap as em


data, ids = em.misc.random_on_cube_edges(1000, sigma=0.05)

dist_sig_parameters = (0.2, 3, 6, 1, 2, 6)
periodicity = float("inf")

axe = em.plot.distance_histogram(data, periodicity, dist_sig_parameters, bins=50)
plt.show()

Trp Cage#

In this example, you can work with actual MD data from simulations of the Trp-cage protein.

Trp Cage
# Standard Library Imports
import os

# Third Party Imports
import matplotlib.pyplot as plt
import numpy as np

# Encodermap imports
import encodermap as em


# setting parameters
data_path = "./data"
run_path = em.misc.run_path("./runs")
csv_path = os.path.join(data_path, "trp_cage.csv")  # can be downloaded from:
# https://www.kaggle.com/tobiasle/trp-cage-dihedrals
parameters = em.Parameters()
parameters.main_path = run_path
parameters.n_steps = 50000


print("loading data ...")
data = np.loadtxt(csv_path, skiprows=1, delimiter=",")
dihedrals = data[:, 3:41]


print("training autoencoder ...")
e_map = em.EncoderMap(parameters, dihedrals)
e_map.train()


print("projecting data ...")
low_d_projection = e_map.encode(dihedrals)


print("plotting result ...")
fig, axe = plt.subplots()
caxe = axe.scatter(
    low_d_projection[:, 0],
    low_d_projection[:, 1],
    c=data[:, -1],
    s=0.1,
    cmap="nipy_spectral",
    marker="o",
    linewidths=0,
)
cbar = fig.colorbar(caxe)
cbar.set_label("helix rmsd")


# generate structures along path
pdb_path = os.path.join(data_path, "trp_cage_extended.pdb")
generator = em.plot.PathGenerateDihedrals(axe, e_map, pdb_path)

plt.show()

Dihedral to Cartesian diUbi#

In this example, you can work with MD data from simulations of the M1-linked diUbi protein.

Data Generation
# Standard Library Imports
import copy
import locale
import os

# Third Party Imports
import matplotlib.pyplot as plt
import MDAnalysis as md
import numpy as np
import tensorflow as tf

# Encodermap imports
import encodermap as em


molname = "diubi"


# ######################### Load data #########################

# Data available at:
# https://www.kaggle.com/andrejberg/md-simulations-of-linear-ubiquitin-dimers

structure_path = "data/{}/01.pdb".format(molname)
trajectory_paths = ["data/{}/{:02d}.xtc".format(molname, i + 1) for i in range(12)]

uni = md.Universe(structure_path, trajectory_paths)

# select only atoms in the backbone or directly bound to the backbone. Atoms in the side chains are not supported (yet)
selected_atoms = uni.select_atoms(
    "backbone or name H or name O1 or (name CD and resname PRO)"
)

# The MolData object takes care of calculating all the necessary information like dihedrals, angles, bondlengths ...
# You can provide a cache_path to store this information for the next time.
moldata = em.MolData(selected_atoms, cache_path="data/{}/cache".format(molname))


# ######################### Define parameters #########################

total_steps = 50000

# First we want to train without C_alpha cost.
# Finally, we want to activate the C_alpha cost to improve the long-range order of the generated conformations
parameters = em.ADCParameters()
parameters.main_path = em.misc.run_path("runs/{}".format(molname))

parameters.cartesian_cost_scale = 0
parameters.cartesian_cost_variant = "mean_abs"
parameters.cartesian_cost_scale_soft_start = (
    int(total_steps / 10 * 9),
    int(total_steps / 10 * 9) + 1000,
)
parameters.cartesian_pwd_start = (
    1  # Calculate pairwise distances starting form the second backbone atom ...
)
parameters.cartesian_pwd_step = 3  # for every third atom. These are the C_alpha atoms

parameters.dihedral_cost_scale = 1
parameters.dihedral_cost_variant = "mean_abs"

parameters.distance_cost_scale = 0  # no distance cost in dihedral space
parameters.cartesian_distance_cost_scale = (
    100  # instead we use distance cost in C_alpha distance space
)
parameters.cartesian_dist_sig_parameters = [400, 10, 5, 1, 2, 5]

parameters.checkpoint_step = max(1, int(total_steps / 10))
parameters.l2_reg_constant = 0.001
parameters.center_cost_scale = 0
parameters.id = molname


# ########### Check how distances are mapped with your choice of Sigmoid parameters ##################

pwd = em.misc.pairwise_dist(
    moldata.central_cartesians[
        ::1000, parameters.cartesian_pwd_start :: parameters.cartesian_pwd_step
    ],
    flat=True,
)
with tf.Session() as sess:
    pwd = sess.run(pwd)
axes = em.plot.distance_histogram(
    pwd, float("inf"), parameters.cartesian_dist_sig_parameters
)
plt.show()
# somehow matplotlib messes with this setting and causes problems in tensorflow
locale.setlocale(locale.LC_NUMERIC, "en_US.UTF-8")


# ######################### Get references from dummy model #########################
dummy_parameters = copy.deepcopy(parameters)
dummy_parameters.main_path = em.misc.create_dir(
    os.path.join(parameters.main_path, "dummy")
)
dummy_parameters.n_steps = int(len(moldata.dihedrals) / parameters.batch_size)
dummy_parameters.summary_step = 1

e_map = em.AngleDihedralCartesianEncoderMapDummy(dummy_parameters, moldata)
e_map.train()
e_map.close()
e_map = None

costs = em.misc.read_from_log(
    os.path.join(dummy_parameters.main_path, "train"),
    ["cost/angle_cost", "cost/dihedral_cost", "cost/cartesian_cost"],
)
means = []
for values in costs:
    means.append(np.mean([i.value for i in values]))
parameters.angle_cost_reference = means[0]
parameters.dihedral_cost_reference = means[1]
parameters.cartesian_cost_reference = means[2]

np.savetxt(
    os.path.join(dummy_parameters.main_path, "adc_cost_means.txt"), np.array(means)
)


# ######################### Run Training #########################

# First run without C_alpha cost
parameters.n_steps = parameters.cartesian_cost_scale_soft_start[0]
e_map = em.AngleDihedralCartesianEncoderMap(parameters, moldata)
e_map.train()
e_map.close()
e_map = None

# Now we turn on C_alpha cost and continue the training run
parameters.n_steps = total_steps - parameters.cartesian_cost_scale_soft_start[0]
parameters.cartesian_cost_scale = 1
ckpt_path = os.path.join(
    parameters.main_path,
    "checkpoints",
    "step{}.ckpt".format(parameters.cartesian_cost_scale_soft_start[0]),
)

e_map = em.AngleDihedralCartesianEncoderMap(
    parameters, moldata, checkpoint_path=ckpt_path
)
e_map.train()
e_map.close()
e_map = None
Data Analysis
# Standard Library Imports
import os

# Third Party Imports
import matplotlib.pyplot as plt
import MDAnalysis as md
import numpy as np

# Encodermap imports
import encodermap as em


molname = "diubi"
run_id = 3
step = 50000
selection_for_alignment = "resid 0:77"

main_path = "runs/{}/run{}".format(molname, run_id)


# ######################### Load data #########################

structure_path = "data/{}/01.pdb".format(molname)
trajectory_paths = ["data/{}/{:02d}.xtc".format(molname, i + 1) for i in range(12)]

uni = md.Universe(structure_path, trajectory_paths)
selected_atoms = uni.select_atoms(
    "backbone or name H or name O1 or (name CD and resname PRO)"
)
moldata = em.MolData(selected_atoms, cache_path="data/{}/cache".format(molname))


# ######################### Load parameters and checkpoint #########################

parameters = em.ADCParameters.load(os.path.join(main_path, "parameters.json"))
e_map = em.AngleDihedralCartesianEncoderMap(
    parameters,
    moldata,
    checkpoint_path=os.path.join(main_path, "checkpoints", "step{}.ckpt".format(step)),
    read_only=True,
)


# ######################### Project Data to map #########################

projected = e_map.encode(moldata.dihedrals)


# ######################### Plot histogram with path generator and lasso Select #########################

hist, xedges, yedges = np.histogram2d(projected[:, 0], projected[:, 1], bins=500)

fig1, axe1 = plt.subplots()

caxe = axe1.imshow(
    -np.log(hist.T),
    origin="low",
    extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]],
    aspect="auto",
)
cbar = fig1.colorbar(caxe)
cbar.set_label("-ln(p)", labelpad=0)
axe1.set_title("Path Generator")
generator = em.plot.PathGenerateCartesians(
    axe1,
    e_map,
    moldata,
    vmd_path="/home/soft/bin/vmd",
    align_reference=moldata.sorted_atoms,
    align_select=selection_for_alignment,
)

fig2, axe2 = plt.subplots()
caxe = axe2.imshow(
    -np.log(hist.T),
    origin="low",
    extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]],
    aspect="auto",
)
cbar = fig2.colorbar(caxe)
cbar.set_label("-ln(p)", labelpad=0)
axe2.set_title("Selector")
selector = em.plot.PathSelect(
    axe2,
    projected,
    moldata,
    e_map.p.main_path,
    vmd_path="/home/soft/bin/vmd",
    align_reference=moldata.sorted_atoms,
    align_select=selection_for_alignment,
)

plt.show()