from math import exp, log
# OpenMM utilities
import mdtraj as md
import numpy as np
import pymbar
from matplotlib import pyplot as plt
from openmm import unit
from pymbar import timeseries
from scipy import interpolate
from scipy.optimize import Bounds, LinearConstraint, minimize, minimize_scalar
kB = (unit.MOLAR_GAS_CONSTANT_R).in_units_of(unit.kilojoule / (unit.kelvin * unit.mole))
[docs]def bin_samples(sample_kn, n_bins):
"""
"""
max_value, min_value = None, None
for index_1 in range(len(sample_kn)):
for index_2 in range(len(sample_kn[index_1])):
sample = sample_kn[index_1][index_2]
if max_value == None:
max_value = sample
if min_value == None:
min_value = sample
if sample > max_value:
max_value = sample
if sample < min_value:
min_value = sample
bin_size = (max_value - min_value) / (n_bins + 1)
bins = np.array(
[[min_value + i * bin_size, min_value + (i + 1) * bin_size] for i in range(n_bins)]
)
bin_counts = np.zeros((len(sample_kn), len(bins)))
for index_1 in range(len(sample_kn)):
for index_2 in range(len(sample_kn[index])):
sample = sample_kn[index_1][index_2]
return (bins, bin_counts)
[docs]def get_entropy_differences(mbar):
"""
Given an `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object, this function computes the entropy differences for the states defined within.
:param mbar: An MBAR() class object (from the 'pymbar' package)
:type mbar: `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object
:returns:
- Delta_s ( np.array( n_mbar_states x n_thermo_states ) - Entropy differences for the thermodynamic states in 'mbar'
- dDelta_s ( np.array( n_mbar_states x n_thermo_states ) - Uncertainty in the entropy differences for the thermodynamic states in 'mbar'
"""
results = mbar.computeEntropyAndEnthalpy()
results = {
"Delta_f": results[0],
"dDelta_f": results[1],
"Delta_u": results[2],
"dDelta_u": results[3],
"Delta_s": results[4],
"dDelta_s": results[5],
}
Delta_s = results["Delta_s"]
dDelta_s = results["dDelta_s"]
return (Delta_s, dDelta_s)
[docs]def get_enthalpy_differences(mbar):
"""
Given an `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object, this function computes the enthalpy differences for the states defined within.
:param mbar: An MBAR() class object (from the 'pymbar' package)
:type mbar: `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object
:returns:
- Delta_u ( np.array( n_mbar_states x n_thermo_states ) - Enthalpy differences for the thermodynamic states in 'mbar'
- dDelta_u ( np.array( n_mbar_states x n_thermo_states ) - Uncertainty in the enthalpy differences for the thermodynamic states in 'mbar'
"""
results = mbar.computeEntropyAndEnthalpy()
results = {
"Delta_f": results[0],
"dDelta_f": results[1],
"Delta_u": results[2],
"dDelta_u": results[3],
"Delta_s": results[4],
"dDelta_s": results[5],
}
Delta_u = results["Delta_u"]
dDelta_u = results["dDelta_u"]
return (Delta_u, dDelta_u)
[docs]def get_free_energy_differences(mbar):
"""
Given an `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object, this function computes the free energy differences for the states defined within.
:param mbar: An MBAR() class object (from the 'pymbar' package)
:type mbar: `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object
:returns:
- df_ij ( np.array( n_mbar_states x n_thermo_states ) - Free energy differences for the thermodynamic states in 'mbar'
- ddf_ij ( np.array( n_mbar_states x n_thermo_states ) - Uncertainty in the free energy differences for the thermodynamic states in 'mbar'
"""
results = mbar.getFreeEnergyDifferences()
results = {"Delta_f": results[0], "dDelta_f": results[1]}
df_ij = results["Delta_f"]
ddf_ij = results["dDelta_f"]
return (df_ij, ddf_ij)
[docs]def get_temperature_list(min_temp, max_temp, num_replicas):
"""
Given the parameters to define a temperature range as input, this function uses logarithmic spacing to generate a list of intermediate temperatures.
:param min_temp: The minimum temperature in the temperature list.
:type min_temp: `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_
:param max_temp: The maximum temperature in the temperature list.
:type max_temp: `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_
:param num_replicas: The number of temperatures in the list.
:type num_replicas: int
:returns:
- temperature_list ( 1D numpy array ( float * simtk.unit.temperature ) ) - List of temperatures
"""
T_unit = min_temp.unit
temperature_list = np.logspace(
np.log10(min_temp.value_in_unit(T_unit)),
np.log10(max_temp.value_in_unit(T_unit)),
num=num_replicas
)
# Reassign units:
temperature_list *= T_unit
return temperature_list
[docs]def get_opt_temperature_list(temperature_list_init, C_v, number_intermediate_states=0, plotfile=None, verbose=True):
"""
Given an initial temperature list, and heat capacity curve that resulted from a replica exchange simulation
using those temperatures, computes a revised temperature list satisfying the constant entropy increase (CEI) method
:param temperature_list_init: List of temperatures for initial replica exchange run
:type temperature_list_init: 1D numpy array ( float * simtk.unit.temperature )
:param C_v: List of heat capacities evaluated at each temperature in temperature_list_init
:type C_v: 1D numpy array [ Quantity ]
:param number_intermediate_states: number of unsampled states between each pair of sampled states (default=0)
:type number_intermediate_states: int
:param plotfile: path to filename for plotting spline fit to C_v/T vs. T (default=None)
:type plotfile: str
:param verbose: option to print final output of scipy optimization routines
:type verbose: bool
:returns:
- T_opt_list ( 1D numpy array ( float * simtk.unit.temperature ) ) - New optimally spaced temperature list
- deltaS_list ( 1D numpy array ( float * simtk.unit ) ) - Actual entropy increases for adjacent temperatures in T_opt_list
"""
# Process initial temperature list
# Check for intermediate states from pymbar
# (more intermediate states will give better estimate of new temperatures)
T_init_sampled = temperature_list_init[::(number_intermediate_states+1)]
T_init_sampled_val = np.zeros((len(T_init_sampled)))
Tunit = T_init_sampled[0].unit
for i in range(len(T_init_sampled)):
T_init_sampled_val[i] = T_init_sampled[i].value_in_unit(Tunit)
T_init_sampled = T_init_sampled_val
# First and last temps are fixed bounds:
T0 = T_init_sampled[0]
TN = T_init_sampled[-1]
# Fit C_v/T vs. T data to spline
xdata = temperature_list_init
ydata = C_v / temperature_list_init
Cv_unit = C_v[0].unit
# Strip units off quantities:
if type(xdata[0]) == unit.quantity.Quantity:
xdata_val = np.zeros((len(xdata)))
xunit = xdata[0].unit
for i in range(len(xdata)):
xdata_val[i] = xdata[i].value_in_unit(xunit)
xdata = xdata_val
if type(ydata[0]) == unit.quantity.Quantity:
ydata_val = np.zeros((len(ydata)))
yunit = ydata[0].unit
for i in range(len(ydata)):
ydata_val[i] = ydata[i].value_in_unit(yunit)
ydata = ydata_val
# Fit cubic spline to data, no smoothing
spline_tck = interpolate.splrep(xdata, ydata, s=0)
if plotfile is not None:
# Plot the spline fit:
figure = plt.figure()
line1 = plt.plot(xdata,ydata,'ok',fillstyle='none',label='simulation_data')
xspline = np.linspace(xdata[0],xdata[-1],1000)
yspline = interpolate.splev(xspline, spline_tck, der=0)
line2 = plt.plot(xspline,yspline,'-b',label='spline fit')
plt.xlabel('Temperature (K)')
plt.ylabel('C_v / T (kJ/mol/K^2)')
plt.legend()
plt.savefig(f'{plotfile}')
plt.close()
# Fix the first and last temps, with intermediate temps varied in a minimization,
# With constraint that the temperature spacings must sum to the original temp range.
# The objective function is the standard deviation of the entropy differences between all adjacent temps.
def entropy_stdev(T_deltas):
# Compute standard deviation of entropy differences
T_opt_list = np.zeros(len(T_deltas)+1)
deltaS_list = np.zeros(len(T_deltas))
T_opt_list[0] = T0
for i in range(len(T_deltas)-1):
T_opt_list[i+1] = T_opt_list[i]+T_deltas[i]
T_opt_list[-1] = TN
for i in range(len(T_deltas)):
deltaS_list[i] = interpolate.splint(T_opt_list[i],T_opt_list[i+1], spline_tck)
return np.std(deltaS_list)
# For initial guess, use the original spacing:
T_delta0 = T_init_sampled[1::]-T_init_sampled[0:-1]
# Set up linear equality constraint:
constraint = LinearConstraint(
np.ones(len(T_init_sampled)-1),
lb=(T_init_sampled[-1]-T_init_sampled[0]),
ub=(T_init_sampled[-1]-T_init_sampled[0]),
)
bounds = Bounds(np.ones(len(T_delta0))*1E-3,np.ones(len(T_delta0))*500)
opt_results = minimize(
entropy_stdev,
T_delta0,
bounds=bounds,
constraints=constraint,
method='SLSQP',
options={
'ftol':1E-12,
'maxiter':1E6}
)
if not opt_results.success:
print('Error: CEI optimization did not converge')
print(f'Constant entropy increase optimization results:\n{opt_results}')
exit()
if verbose:
print(f'Constant entropy increase optimization results:\n{opt_results}')
# Retreive final temperature list and entropy diff list:
T_deltas_opt = opt_results.x
T_opt_list = np.zeros(len(T_deltas_opt)+1)
deltaS_list = np.zeros(len(T_deltas_opt))
T_opt_list[0] = T0
for i in range(len(T_deltas_opt)):
T_opt_list[i+1] = T_opt_list[i]+T_deltas_opt[i]
for i in range(len(T_deltas_opt)):
deltaS_list[i] = interpolate.splint(T_opt_list[i],T_opt_list[i+1], spline_tck)
return T_opt_list*unit.kelvin, deltaS_list*Cv_unit
[docs]def get_mbar_expectation(E_kln, temperature_list, NumIntermediates, output=None, mbar=None):
"""
Given a properly-formatted matrix of energies with associated temperatures this function reweights with MBAR (if 'mbar'=None), and can also compute the expectation value for any property of interest.
.. warning:: This function accepts an input matrix thtat has either 'E_kln' or 'E_kn' format, but always provides an 'E_kn'-formatted matrix in return.
:param E_kln: A matrix of energies or samples for a property that we would like to use to make predictions with MBAR.
:type E_kln: List( List( float * simtk.unit.energy for simulation_steps ) for num_replicas ) OR List( List( List( float * simtk.unit.energy for simulation_steps ) for num_replicas ) for num_replicas )
:param temperature_list: List of temperatures for the simulation data.
:type temperature_list: List( float * simtk.unit.temperature )
:param NumIntermediates: The number of states to insert between existing states in 'T_from_file'
:type NumIntermediates: int
:param output: The 'output' option to use when calling MBAR, default = 'differences'
:type output: str
:param mbar: An MBAR() class object (from the 'pymbar' package), default = None
:type mbar: `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object
:returns:
- mbar ( `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ ) - An MBAR() class object (from the 'pymbar' package)
- E_kn ( List( List( float * simtk.unit.energy for num_samples ) for num_replicas ) ) - A matrix of energies or samples for a property that we would like to use to make predictions with MBAR.
- result ( List( List( float for num_samples ) for num_replicas ) - The MBAR expectation value for the energies and/or other samples that were provided.
- dresult ( List( List( float for num_samples ) for num_replicas ) - The MBAR expectation value for the energies and/or other samples that were provided.
- Temp_k ( List( float * simtk.unit.temperature ) ) - A new list of temperatures that includes the inserted intermediates.
"""
if mbar == None:
NumTemps = len(temperature_list) # Last TEMP # + 1 (start counting at 1)
T_from_file = np.array([temperature._value for temperature in temperature_list])
E_from_file = E_kln
originalK = len(T_from_file)
N_k = np.zeros(originalK, np.int32)
g = np.zeros(originalK, np.float64)
for k in range(originalK): # subsample the energies
g[k] = pymbar.timeseries.statisticalInefficiency(E_from_file[k][k])
indices = np.array(
pymbar.timeseries.subsampleCorrelatedData(E_from_file[k][k], g=g[k])
) # indices of uncorrelated samples
N_k[k] = len(indices)
E_from_file[k, k, 0 : N_k[k]] = E_from_file[k, k, indices]
if NumIntermediates > 0:
Temp_k = get_intermediate_temperatures(temperature_list, NumIntermediates)
else:
Temp_k = np.array([temperature._value for temperature in temperature_list])
# Update number of states
K = len(Temp_k)
# Loop, inserting E's into blank matrix (leaving blanks only where new Ts are inserted)
Nall_k = np.zeros(
[K], np.int32
) # Number of samples (n) for each state (k) = number of iterations/energies
try:
E_kn = np.zeros([K, len(E_from_file[0][0])], np.float64)
for k in range(originalK - 1):
E_kn[k + k * NumIntermediates, 0 : N_k[k]] = E_from_file[k, k, 0 : N_k[k]]
Nall_k[k + k * NumIntermediates] = N_k[k]
E_kn[-1][0 : N_k[-1]] = E_from_file[-1][-1][0 : N_k[-1]]
Nall_k[-1] = N_k[-1]
except:
E_kn = np.zeros([K, len(E_from_file[0])], np.float64)
for k in range(originalK):
E_kn[k + k * NumIntermediates, 0 : N_k[k]] = E_from_file[k, 0 : N_k[k]]
Nall_k[k + k * NumIntermediates] = N_k[k]
beta_k = 1 / (kB._value * Temp_k)
allE_expect = np.zeros([K], np.float64)
allE2_expect = np.zeros([K], np.float64)
dE_expect = np.zeros([K], np.float64)
u_kn = np.zeros(
[K, sum(Nall_k)], np.float64
) # u_kln is reduced pot. ener. of segment n of temp k evaluated at temp l
# index = 0
for k in range(K):
index = 0
for l in range(K):
u_kn[k, index : index + Nall_k[l]] = beta_k[k] * E_kn[l, 0 : Nall_k[l]]
index = index + Nall_k[l]
# ------------------------------------------------------------------------
# Initialize MBAR
# ------------------------------------------------------------------------
print("Initializing MBAR:")
print("--K = number of Temperatures with data = %d" % (originalK))
print("--L = number of total Temperatures = %d" % (K))
print("--N = number of Energies per Temperature = %d" % (np.max(Nall_k)))
mbar = pymbar.MBAR(u_kn, Nall_k, verbose=False, relative_tolerance=1e-12, initial_f_k=None)
E_kn = u_kn # not a copy, we are going to write over it, but we don't need it any more.
for k in range(K):
E_kn[k, :] *= beta_k[k] ** (
-1
) # get the 'unreduced' potential -- we can't take differences of reduced potentials because the beta is different; math is much more confusing with derivatives of the reduced potentials.
else:
E_kn = E_kln
Temp_k = temperature_list
if output != None:
results = mbar.computeExpectations(E_kn, output="differences", state_dependent=True)
results = {"mu": results[0], "sigma": results[1]}
result = results["mu"]
dresult = results["sigma"]
else:
results = mbar.computeExpectations(E_kn, state_dependent=True)
results = {"mu": results[0], "sigma": results[1]}
result = results["mu"]
dresult = results["sigma"]
return (mbar, E_kn, result, dresult, Temp_k)