import numpy as np
from math import exp, log
# OpenMM utilities
import mdtraj as md
import simtk.unit as unit
import pymbar
from pymbar import timeseries
from scipy import interpolate
from scipy.optimize import minimize, minimize_scalar, Bounds, LinearConstraint
from matplotlib import pyplot as plt
kB = (unit.MOLAR_GAS_CONSTANT_R).in_units_of(unit.kilojoule / (unit.kelvin * unit.mole))
[docs]def bin_samples(sample_kn, n_bins):
"""
"""
max_value, min_value = None, None
for index_1 in range(len(sample_kn)):
for index_2 in range(len(sample_kn[index_1])):
sample = sample_kn[index_1][index_2]
if max_value == None:
max_value = sample
if min_value == None:
min_value = sample
if sample > max_value:
max_value = sample
if sample < min_value:
min_value = sample
bin_size = (max_value - min_value) / (n_bins + 1)
bins = np.array(
[[min_value + i * bin_size, min_value + (i + 1) * bin_size] for i in range(n_bins)]
)
bin_counts = np.zeros((len(sample_kn), len(bins)))
for index_1 in range(len(sample_kn)):
for index_2 in range(len(sample_kn[index])):
sample = sample_kn[index_1][index_2]
return (bins, bin_counts)
[docs]def get_entropy_differences(mbar):
"""
Given an `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object, this function computes the entropy differences for the states defined within.
:param mbar: An MBAR() class object (from the 'pymbar' package)
:type mbar: `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object
:returns:
- Delta_s ( np.array( n_mbar_states x n_thermo_states ) - Entropy differences for the thermodynamic states in 'mbar'
- dDelta_s ( np.array( n_mbar_states x n_thermo_states ) - Uncertainty in the entropy differences for the thermodynamic states in 'mbar'
"""
results = mbar.computeEntropyAndEnthalpy()
results = {
"Delta_f": results[0],
"dDelta_f": results[1],
"Delta_u": results[2],
"dDelta_u": results[3],
"Delta_s": results[4],
"dDelta_s": results[5],
}
Delta_s = results["Delta_s"]
dDelta_s = results["dDelta_s"]
return (Delta_s, dDelta_s)
[docs]def get_enthalpy_differences(mbar):
"""
Given an `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object, this function computes the enthalpy differences for the states defined within.
:param mbar: An MBAR() class object (from the 'pymbar' package)
:type mbar: `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object
:returns:
- Delta_u ( np.array( n_mbar_states x n_thermo_states ) - Enthalpy differences for the thermodynamic states in 'mbar'
- dDelta_u ( np.array( n_mbar_states x n_thermo_states ) - Uncertainty in the enthalpy differences for the thermodynamic states in 'mbar'
"""
results = mbar.computeEntropyAndEnthalpy()
results = {
"Delta_f": results[0],
"dDelta_f": results[1],
"Delta_u": results[2],
"dDelta_u": results[3],
"Delta_s": results[4],
"dDelta_s": results[5],
}
Delta_u = results["Delta_u"]
dDelta_u = results["dDelta_u"]
return (Delta_u, dDelta_u)
[docs]def get_free_energy_differences(mbar):
"""
Given an `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object, this function computes the free energy differences for the states defined within.
:param mbar: An MBAR() class object (from the 'pymbar' package)
:type mbar: `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object
:returns:
- df_ij ( np.array( n_mbar_states x n_thermo_states ) - Free energy differences for the thermodynamic states in 'mbar'
- ddf_ij ( np.array( n_mbar_states x n_thermo_states ) - Uncertainty in the free energy differences for the thermodynamic states in 'mbar'
"""
results = mbar.getFreeEnergyDifferences()
results = {"Delta_f": results[0], "dDelta_f": results[1]}
df_ij = results["Delta_f"]
ddf_ij = results["dDelta_f"]
return (df_ij, ddf_ij)
[docs]def get_temperature_list(min_temp, max_temp, num_replicas):
"""
Given the parameters to define a temperature range as input, this function uses logarithmic spacing to generate a list of intermediate temperatures.
:param min_temp: The minimum temperature in the temperature list.
:type min_temp: `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_
:param max_temp: The maximum temperature in the temperature list.
:type max_temp: `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_
:param num_replicas: The number of temperatures in the list.
:type num_replicas: int
:returns:
- temperature_list ( 1D numpy array ( float * simtk.unit.temperature ) ) - List of temperatures
"""
T_unit = min_temp.unit
temperature_list = np.logspace(
np.log10(min_temp.value_in_unit(T_unit)),
np.log10(max_temp.value_in_unit(T_unit)),
num=num_replicas
)
# Reassign units:
temperature_list *= T_unit
return temperature_list
[docs]def get_opt_temperature_list(temperature_list_init, C_v, number_intermediate_states=0, plotfile=None, verbose=True):
"""
Given an initial temperature list, and heat capacity curve that resulted from a replica exchange simulation
using those temperatures, computes a revised temperature list satisfying the constant entropy increase (CEI) method
:param temperature_list_init: List of temperatures for initial replica exchange run
:type temperature_list_init: 1D numpy array ( float * simtk.unit.temperature )
:param C_v: List of heat capacities evaluated at each temperature in temperature_list_init
:type C_v: 1D numpy array [ Quantity ]
:param number_intermediate_states: number of unsampled states between each pair of sampled states (default=0)
:type number_intermediate_states: int
:param plotfile: path to filename for plotting spline fit to C_v/T vs. T (default=None)
:type plotfile: str
:param verbose: option to print final output of scipy optimization routines
:type verbose: bool
:returns:
- T_opt_list ( 1D numpy array ( float * simtk.unit.temperature ) ) - New optimally spaced temperature list
- deltaS_list ( 1D numpy array ( float * simtk.unit ) ) - Actual entropy increases for adjacent temperatures in T_opt_list
"""
# Process initial temperature list
# Check for intermediate states from pymbar
# (more intermediate states will give better estimate of new temperatures)
T_init_sampled = temperature_list_init[::(number_intermediate_states+1)]
T_init_sampled_val = np.zeros((len(T_init_sampled)))
Tunit = T_init_sampled[0].unit
for i in range(len(T_init_sampled)):
T_init_sampled_val[i] = T_init_sampled[i].value_in_unit(Tunit)
T_init_sampled = T_init_sampled_val
# First and last temps are fixed bounds:
T0 = T_init_sampled[0]
TN = T_init_sampled[-1]
# Fit C_v/T vs. T data to spline
xdata = temperature_list_init
ydata = C_v / temperature_list_init
Cv_unit = C_v[0].unit
# Strip units off quantities:
if type(xdata[0]) == unit.quantity.Quantity:
xdata_val = np.zeros((len(xdata)))
xunit = xdata[0].unit
for i in range(len(xdata)):
xdata_val[i] = xdata[i].value_in_unit(xunit)
xdata = xdata_val
if type(ydata[0]) == unit.quantity.Quantity:
ydata_val = np.zeros((len(ydata)))
yunit = ydata[0].unit
for i in range(len(ydata)):
ydata_val[i] = ydata[i].value_in_unit(yunit)
ydata = ydata_val
# Fit cubic spline to data, no smoothing
spline_tck = interpolate.splrep(xdata, ydata, s=0)
if plotfile is not None:
# Plot the spline fit:
figure = plt.figure()
line1 = plt.plot(xdata,ydata,'ok',fillstyle='none',label='simulation_data')
xspline = np.linspace(xdata[0],xdata[-1],1000)
yspline = interpolate.splev(xspline, spline_tck, der=0)
line2 = plt.plot(xspline,yspline,'-b',label='spline fit')
plt.xlabel('Temperature (K)')
plt.ylabel('C_v / T (kJ/mol/K^2)')
plt.legend()
plt.savefig(f'{plotfile}')
plt.close()
# Fix the first and last temps, with intermediate temps varied in a minimization,
# With constraint that the temperature spacings must sum to the original temp range.
# The objective function is the standard deviation of the entropy differences between all adjacent temps.
def entropy_stdev(T_deltas):
# Compute standard deviation of entropy differences
T_opt_list = np.zeros(len(T_deltas)+1)
deltaS_list = np.zeros(len(T_deltas))
T_opt_list[0] = T0
for i in range(len(T_deltas)-1):
T_opt_list[i+1] = T_opt_list[i]+T_deltas[i]
T_opt_list[-1] = TN
for i in range(len(T_deltas)):
deltaS_list[i] = interpolate.splint(T_opt_list[i],T_opt_list[i+1], spline_tck)
return np.std(deltaS_list)
# For initial guess, use the original spacing:
T_delta0 = T_init_sampled[1::]-T_init_sampled[0:-1]
# Set up linear equality constraint:
constraint = LinearConstraint(
np.ones(len(T_init_sampled)-1),
lb=(T_init_sampled[-1]-T_init_sampled[0]),
ub=(T_init_sampled[-1]-T_init_sampled[0]),
)
bounds = Bounds(np.ones(len(T_delta0))*1E-3,np.ones(len(T_delta0))*500)
opt_results = minimize(
entropy_stdev,
T_delta0,
bounds=bounds,
constraints=constraint,
method='SLSQP',
options={
'ftol':1E-12,
'maxiter':1E6}
)
if not opt_results.success:
print('Error: CEI optimization did not converge')
print(f'Constant entropy increase optimization results:\n{opt_results}')
exit()
if verbose:
print(f'Constant entropy increase optimization results:\n{opt_results}')
# Retreive final temperature list and entropy diff list:
T_deltas_opt = opt_results.x
T_opt_list = np.zeros(len(T_deltas_opt)+1)
deltaS_list = np.zeros(len(T_deltas_opt))
T_opt_list[0] = T0
for i in range(len(T_deltas_opt)):
T_opt_list[i+1] = T_opt_list[i]+T_deltas_opt[i]
for i in range(len(T_deltas_opt)):
deltaS_list[i] = interpolate.splint(T_opt_list[i],T_opt_list[i+1], spline_tck)
return T_opt_list*unit.kelvin, deltaS_list*Cv_unit
[docs]def get_mbar_expectation(E_kln, temperature_list, NumIntermediates, output=None, mbar=None):
"""
Given a properly-formatted matrix of energies with associated temperatures this function reweights with MBAR (if 'mbar'=None), and can also compute the expectation value for any property of interest.
.. warning:: This function accepts an input matrix thtat has either 'E_kln' or 'E_kn' format, but always provides an 'E_kn'-formatted matrix in return.
:param E_kln: A matrix of energies or samples for a property that we would like to use to make predictions with MBAR.
:type E_kln: List( List( float * simtk.unit.energy for simulation_steps ) for num_replicas ) OR List( List( List( float * simtk.unit.energy for simulation_steps ) for num_replicas ) for num_replicas )
:param temperature_list: List of temperatures for the simulation data.
:type temperature_list: List( float * simtk.unit.temperature )
:param NumIntermediates: The number of states to insert between existing states in 'T_from_file'
:type NumIntermediates: int
:param output: The 'output' option to use when calling MBAR, default = 'differences'
:type output: str
:param mbar: An MBAR() class object (from the 'pymbar' package), default = None
:type mbar: `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object
:returns:
- mbar ( `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ ) - An MBAR() class object (from the 'pymbar' package)
- E_kn ( List( List( float * simtk.unit.energy for num_samples ) for num_replicas ) ) - A matrix of energies or samples for a property that we would like to use to make predictions with MBAR.
- result ( List( List( float for num_samples ) for num_replicas ) - The MBAR expectation value for the energies and/or other samples that were provided.
- dresult ( List( List( float for num_samples ) for num_replicas ) - The MBAR expectation value for the energies and/or other samples that were provided.
- Temp_k ( List( float * simtk.unit.temperature ) ) - A new list of temperatures that includes the inserted intermediates.
"""
if mbar == None:
NumTemps = len(temperature_list) # Last TEMP # + 1 (start counting at 1)
T_from_file = np.array([temperature._value for temperature in temperature_list])
E_from_file = E_kln
originalK = len(T_from_file)
N_k = np.zeros(originalK, np.int32)
g = np.zeros(originalK, np.float64)
for k in range(originalK): # subsample the energies
g[k] = pymbar.timeseries.statisticalInefficiency(E_from_file[k][k])
indices = np.array(
pymbar.timeseries.subsampleCorrelatedData(E_from_file[k][k], g=g[k])
) # indices of uncorrelated samples
N_k[k] = len(indices)
E_from_file[k, k, 0 : N_k[k]] = E_from_file[k, k, indices]
if NumIntermediates > 0:
Temp_k = get_intermediate_temperatures(temperature_list, NumIntermediates)
else:
Temp_k = np.array([temperature._value for temperature in temperature_list])
# Update number of states
K = len(Temp_k)
# Loop, inserting E's into blank matrix (leaving blanks only where new Ts are inserted)
Nall_k = np.zeros(
[K], np.int32
) # Number of samples (n) for each state (k) = number of iterations/energies
try:
E_kn = np.zeros([K, len(E_from_file[0][0])], np.float64)
for k in range(originalK - 1):
E_kn[k + k * NumIntermediates, 0 : N_k[k]] = E_from_file[k, k, 0 : N_k[k]]
Nall_k[k + k * NumIntermediates] = N_k[k]
E_kn[-1][0 : N_k[-1]] = E_from_file[-1][-1][0 : N_k[-1]]
Nall_k[-1] = N_k[-1]
except:
E_kn = np.zeros([K, len(E_from_file[0])], np.float64)
for k in range(originalK):
E_kn[k + k * NumIntermediates, 0 : N_k[k]] = E_from_file[k, 0 : N_k[k]]
Nall_k[k + k * NumIntermediates] = N_k[k]
beta_k = 1 / (kB._value * Temp_k)
allE_expect = np.zeros([K], np.float64)
allE2_expect = np.zeros([K], np.float64)
dE_expect = np.zeros([K], np.float64)
u_kn = np.zeros(
[K, sum(Nall_k)], np.float64
) # u_kln is reduced pot. ener. of segment n of temp k evaluated at temp l
# index = 0
for k in range(K):
index = 0
for l in range(K):
u_kn[k, index : index + Nall_k[l]] = beta_k[k] * E_kn[l, 0 : Nall_k[l]]
index = index + Nall_k[l]
# ------------------------------------------------------------------------
# Initialize MBAR
# ------------------------------------------------------------------------
print("Initializing MBAR:")
print("--K = number of Temperatures with data = %d" % (originalK))
print("--L = number of total Temperatures = %d" % (K))
print("--N = number of Energies per Temperature = %d" % (np.max(Nall_k)))
mbar = pymbar.MBAR(u_kn, Nall_k, verbose=False, relative_tolerance=1e-12, initial_f_k=None)
E_kn = u_kn # not a copy, we are going to write over it, but we don't need it any more.
for k in range(K):
E_kn[k, :] *= beta_k[k] ** (
-1
) # get the 'unreduced' potential -- we can't take differences of reduced potentials because the beta is different; math is much more confusing with derivatives of the reduced potentials.
else:
E_kn = E_kln
Temp_k = temperature_list
if output != None:
results = mbar.computeExpectations(E_kn, output="differences", state_dependent=True)
results = {"mu": results[0], "sigma": results[1]}
result = results["mu"]
dresult = results["sigma"]
else:
results = mbar.computeExpectations(E_kn, state_dependent=True)
results = {"mu": results[0], "sigma": results[1]}
result = results["mu"]
dresult = results["sigma"]
return (mbar, E_kn, result, dresult, Temp_k)