Dihedral to cartesian diubi#

import copy
import locale
import os

import matplotlib.pyplot as plt
import MDAnalysis as md
import numpy as np
import tensorflow as tf

import encodermap as em

molname = "diubi"


# ######################### Load data #########################

# Data available at:
# https://www.kaggle.com/andrejberg/md-simulations-of-linear-ubiquitin-dimers

structure_path = "data/{}/01.pdb".format(molname)
trajectory_paths = ["data/{}/{:02d}.xtc".format(molname, i + 1) for i in range(12)]

uni = md.Universe(structure_path, trajectory_paths)

# select only atoms in the backbone or directly bound to the backbone. Atoms in the side chains are not supported (yet)
selected_atoms = uni.select_atoms(
    "backbone or name H or name O1 or (name CD and resname PRO)"
)

# The MolData object takes care of calculating all the necessary information like dihedrals, angles, bondlengths ...
# You can provide a cache_path to store this information for the next time.
moldata = em.MolData(selected_atoms, cache_path="data/{}/cache".format(molname))


# ######################### Define parameters #########################

total_steps = 50000

# First we want to train without C_alpha cost.
# Finally, we want to activate the C_alpha cost to improve the long-range order of the generated conformations
parameters = em.ADCParameters()
parameters.main_path = em.misc.run_path("runs/{}".format(molname))

parameters.cartesian_cost_scale = 0
parameters.cartesian_cost_variant = "mean_abs"
parameters.cartesian_cost_scale_soft_start = (
    int(total_steps / 10 * 9),
    int(total_steps / 10 * 9) + 1000,
)
parameters.cartesian_pwd_start = (
    1  # Calculate pairwise distances starting form the second backbone atom ...
)
parameters.cartesian_pwd_step = 3  # for every third atom. These are the C_alpha atoms

parameters.dihedral_cost_scale = 1
parameters.dihedral_cost_variant = "mean_abs"

parameters.distance_cost_scale = 0  # no distance cost in dihedral space
parameters.cartesian_distance_cost_scale = (
    100  # instead we use distance cost in C_alpha distance space
)
parameters.cartesian_dist_sig_parameters = [400, 10, 5, 1, 2, 5]

parameters.checkpoint_step = max(1, int(total_steps / 10))
parameters.l2_reg_constant = 0.001
parameters.center_cost_scale = 0
parameters.id = molname


# ########### Check how distances are mapped with your choice of Sigmoid parameters ##################

pwd = em.misc.pairwise_dist(
    moldata.central_cartesians[
        ::1000, parameters.cartesian_pwd_start :: parameters.cartesian_pwd_step
    ],
    flat=True,
)
with tf.Session() as sess:
    pwd = sess.run(pwd)
axes = em.plot.distance_histogram(
    pwd, float("inf"), parameters.cartesian_dist_sig_parameters
)
plt.show()
# somehow matplotlib messes with this setting and causes problems in tensorflow
locale.setlocale(locale.LC_NUMERIC, "en_US.UTF-8")


# ######################### Get references from dummy model #########################
dummy_parameters = copy.deepcopy(parameters)
dummy_parameters.main_path = em.misc.create_dir(
    os.path.join(parameters.main_path, "dummy")
)
dummy_parameters.n_steps = int(len(moldata.dihedrals) / parameters.batch_size)
dummy_parameters.summary_step = 1

e_map = em.AngleDihedralCartesianEncoderMapDummy(dummy_parameters, moldata)
e_map.train()
e_map.close()
e_map = None

costs = em.misc.read_from_log(
    os.path.join(dummy_parameters.main_path, "train"),
    ["cost/angle_cost", "cost/dihedral_cost", "cost/cartesian_cost"],
)
means = [np.mean(cost[:, 2]) for cost in costs]
parameters.angle_cost_reference = means[0]
parameters.dihedral_cost_reference = means[1]
parameters.cartesian_cost_reference = means[2]

np.savetxt(
    os.path.join(dummy_parameters.main_path, "adc_cost_means.txt"), np.array(means)
)


# ######################### Run Training #########################

# First run without C_alpha cost
parameters.n_steps = parameters.cartesian_cost_scale_soft_start[0]
e_map = em.AngleDihedralCartesianEncoderMap(parameters, moldata)
e_map.train()
e_map.close()
e_map = None

# Now we turn on C_alpha cost and continue the training run
parameters.n_steps = total_steps - parameters.cartesian_cost_scale_soft_start[0]
parameters.cartesian_cost_scale = 1
ckpt_path = os.path.join(
    parameters.main_path,
    "checkpoints",
    "step{}.ckpt".format(parameters.cartesian_cost_scale_soft_start[0]),
)

e_map = em.AngleDihedralCartesianEncoderMap(
    parameters, moldata, checkpoint_path=ckpt_path
)
e_map.train()
e_map.close()
e_map = None