Source code for cg_openmm.parameters.reweight

import numpy as np
from math import exp, log

# OpenMM utilities
import mdtraj as md
import simtk.unit as unit
import pymbar
from pymbar import timeseries
from scipy import interpolate
from scipy.optimize import minimize, minimize_scalar, Bounds, LinearConstraint
from matplotlib import pyplot as plt

kB = (unit.MOLAR_GAS_CONSTANT_R).in_units_of(unit.kilojoule / (unit.kelvin * unit.mole))

[docs]def bin_samples(sample_kn, n_bins): """ """ max_value, min_value = None, None for index_1 in range(len(sample_kn)): for index_2 in range(len(sample_kn[index_1])): sample = sample_kn[index_1][index_2] if max_value == None: max_value = sample if min_value == None: min_value = sample if sample > max_value: max_value = sample if sample < min_value: min_value = sample bin_size = (max_value - min_value) / (n_bins + 1) bins = np.array( [[min_value + i * bin_size, min_value + (i + 1) * bin_size] for i in range(n_bins)] ) bin_counts = np.zeros((len(sample_kn), len(bins))) for index_1 in range(len(sample_kn)): for index_2 in range(len(sample_kn[index])): sample = sample_kn[index_1][index_2] return (bins, bin_counts)
[docs]def get_decorrelated_samples(replica_positions, replica_energies, temperature_list): """ Given a set of replica exchange trajectories, energies, and associated temperatures, this function returns decorrelated samples, as obtained from pymbar with timeseries.subsampleCorrelatedData. :param replica_positions: Positions array for the replica exchange data for which we will write PDB files :type replica_positions: `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_ ( np.array( [n_replicas,cgmodel.num_beads,3] ), simtk.unit ) :param replica_energies: List of dimension num_replicas X simulation_steps, which gives the energies for all replicas at all simulation steps :type replica_energies: List( List( float * simtk.unit.energy for simulation_steps ) for num_replicas ) :param temperature_list: List of temperatures for the simulation data. :type temperature_list: List( float * simtk.unit.temperature ) :returns: - configurations ( List( `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_ (n_decorrelated_samples,cgmodel.num_beads,3), simtk.unit ) ) - A list of decorrelated samples - energies ( List( `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_ ) ) - The energies for the decorrelated samples (configurations) """ all_poses = [] all_energies = [] for replica_index in range(len(replica_positions)): energies = replica_energies[replica_index][replica_index] [t0, g, Neff_max] = timeseries.detectEquilibration(energies) energies_equil = energies[t0:] poses_equil = replica_positions[replica_index][t0:] indices = timeseries.subsampleCorrelatedData(energies_equil) for index in indices: all_energies.append(energies_equil[index]) all_poses.append(poses_equil[index]) all_energies = np.array([float(energy) for energy in all_energies]) return (all_poses, all_energies)
[docs]def get_entropy_differences(mbar): """ Given an `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object, this function computes the entropy differences for the states defined within. :param mbar: An MBAR() class object (from the 'pymbar' package) :type mbar: `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object :returns: - Delta_s ( np.array( n_mbar_states x n_thermo_states ) - Entropy differences for the thermodynamic states in 'mbar' - dDelta_s ( np.array( n_mbar_states x n_thermo_states ) - Uncertainty in the entropy differences for the thermodynamic states in 'mbar' """ results = mbar.computeEntropyAndEnthalpy() results = { "Delta_f": results[0], "dDelta_f": results[1], "Delta_u": results[2], "dDelta_u": results[3], "Delta_s": results[4], "dDelta_s": results[5], } Delta_s = results["Delta_s"] dDelta_s = results["dDelta_s"] return (Delta_s, dDelta_s)
[docs]def get_enthalpy_differences(mbar): """ Given an `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object, this function computes the enthalpy differences for the states defined within. :param mbar: An MBAR() class object (from the 'pymbar' package) :type mbar: `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object :returns: - Delta_u ( np.array( n_mbar_states x n_thermo_states ) - Enthalpy differences for the thermodynamic states in 'mbar' - dDelta_u ( np.array( n_mbar_states x n_thermo_states ) - Uncertainty in the enthalpy differences for the thermodynamic states in 'mbar' """ results = mbar.computeEntropyAndEnthalpy() results = { "Delta_f": results[0], "dDelta_f": results[1], "Delta_u": results[2], "dDelta_u": results[3], "Delta_s": results[4], "dDelta_s": results[5], } Delta_u = results["Delta_u"] dDelta_u = results["dDelta_u"] return (Delta_u, dDelta_u)
[docs]def get_free_energy_differences(mbar): """ Given an `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object, this function computes the free energy differences for the states defined within. :param mbar: An MBAR() class object (from the 'pymbar' package) :type mbar: `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object :returns: - df_ij ( np.array( n_mbar_states x n_thermo_states ) - Free energy differences for the thermodynamic states in 'mbar' - ddf_ij ( np.array( n_mbar_states x n_thermo_states ) - Uncertainty in the free energy differences for the thermodynamic states in 'mbar' """ results = mbar.getFreeEnergyDifferences() results = {"Delta_f": results[0], "dDelta_f": results[1]} df_ij = results["Delta_f"] ddf_ij = results["dDelta_f"] return (df_ij, ddf_ij)
[docs]def get_temperature_list(min_temp, max_temp, num_replicas): """ Given the parameters to define a temperature range as input, this function uses logarithmic spacing to generate a list of intermediate temperatures. :param min_temp: The minimum temperature in the temperature list. :type min_temp: `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_ :param max_temp: The maximum temperature in the temperature list. :type max_temp: `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_ :param num_replicas: The number of temperatures in the list. :type num_replicas: int :returns: - temperature_list ( 1D numpy array ( float * simtk.unit.temperature ) ) - List of temperatures """ T_unit = min_temp.unit temperature_list = np.logspace( np.log10(min_temp.value_in_unit(T_unit)), np.log10(max_temp.value_in_unit(T_unit)), num=num_replicas ) # Reassign units: temperature_list *= T_unit return temperature_list
[docs]def get_opt_temperature_list(temperature_list_init, C_v, number_intermediate_states=0, plotfile=None, verbose=True): """ Given an initial temperature list, and heat capacity curve that resulted from a replica exchange simulation using those temperatures, computes a revised temperature list satisfying the constant entropy increase (CEI) method :param temperature_list_init: List of temperatures for initial replica exchange run :type temperature_list_init: 1D numpy array ( float * simtk.unit.temperature ) :param C_v: List of heat capacities evaluated at each temperature in temperature_list_init :type C_v: 1D numpy array [ Quantity ] :param number_intermediate_states: number of unsampled states between each pair of sampled states (default=0) :type number_intermediate_states: int :param plotfile: path to filename for plotting spline fit to C_v/T vs. T (default=None) :type plotfile: str :param verbose: option to print final output of scipy optimization routines :type verbose: bool :returns: - T_opt_list ( 1D numpy array ( float * simtk.unit.temperature ) ) - New optimally spaced temperature list - deltaS_list ( 1D numpy array ( float * simtk.unit ) ) - Actual entropy increases for adjacent temperatures in T_opt_list """ # Process initial temperature list # Check for intermediate states from pymbar # (more intermediate states will give better estimate of new temperatures) T_init_sampled = temperature_list_init[::(number_intermediate_states+1)] T_init_sampled_val = np.zeros((len(T_init_sampled))) Tunit = T_init_sampled[0].unit for i in range(len(T_init_sampled)): T_init_sampled_val[i] = T_init_sampled[i].value_in_unit(Tunit) T_init_sampled = T_init_sampled_val # First and last temps are fixed bounds: T0 = T_init_sampled[0] TN = T_init_sampled[-1] # Fit C_v/T vs. T data to spline xdata = temperature_list_init ydata = C_v / temperature_list_init Cv_unit = C_v[0].unit # Strip units off quantities: if type(xdata[0]) == unit.quantity.Quantity: xdata_val = np.zeros((len(xdata))) xunit = xdata[0].unit for i in range(len(xdata)): xdata_val[i] = xdata[i].value_in_unit(xunit) xdata = xdata_val if type(ydata[0]) == unit.quantity.Quantity: ydata_val = np.zeros((len(ydata))) yunit = ydata[0].unit for i in range(len(ydata)): ydata_val[i] = ydata[i].value_in_unit(yunit) ydata = ydata_val # Fit cubic spline to data, no smoothing spline_tck = interpolate.splrep(xdata, ydata, s=0) if plotfile is not None: # Plot the spline fit: figure = plt.figure() line1 = plt.plot(xdata,ydata,'ok',fillstyle='none',label='simulation_data') xspline = np.linspace(xdata[0],xdata[-1],1000) yspline = interpolate.splev(xspline, spline_tck, der=0) line2 = plt.plot(xspline,yspline,'-b',label='spline fit') plt.xlabel('Temperature (K)') plt.ylabel('C_v / T (kJ/mol/K^2)') plt.legend() plt.savefig(f'{plotfile}') plt.close() # Fix the first and last temps, with intermediate temps varied in a minimization, # With constraint that the temperature spacings must sum to the original temp range. # The objective function is the standard deviation of the entropy differences between all adjacent temps. def entropy_stdev(T_deltas): # Compute standard deviation of entropy differences T_opt_list = np.zeros(len(T_deltas)+1) deltaS_list = np.zeros(len(T_deltas)) T_opt_list[0] = T0 for i in range(len(T_deltas)-1): T_opt_list[i+1] = T_opt_list[i]+T_deltas[i] T_opt_list[-1] = TN for i in range(len(T_deltas)): deltaS_list[i] = interpolate.splint(T_opt_list[i],T_opt_list[i+1], spline_tck) return np.std(deltaS_list) # For initial guess, use the original spacing: T_delta0 = T_init_sampled[1::]-T_init_sampled[0:-1] # Set up linear equality constraint: constraint = LinearConstraint( np.ones(len(T_init_sampled)-1), lb=(T_init_sampled[-1]-T_init_sampled[0]), ub=(T_init_sampled[-1]-T_init_sampled[0]), ) bounds = Bounds(np.ones(len(T_delta0))*1E-3,np.ones(len(T_delta0))*500) opt_results = minimize( entropy_stdev, T_delta0, bounds=bounds, constraints=constraint, method='SLSQP', options={ 'ftol':1E-12, 'maxiter':1E6} ) if not opt_results.success: print('Error: CEI optimization did not converge') print(f'Constant entropy increase optimization results:\n{opt_results}') exit() if verbose: print(f'Constant entropy increase optimization results:\n{opt_results}') # Retreive final temperature list and entropy diff list: T_deltas_opt = opt_results.x T_opt_list = np.zeros(len(T_deltas_opt)+1) deltaS_list = np.zeros(len(T_deltas_opt)) T_opt_list[0] = T0 for i in range(len(T_deltas_opt)): T_opt_list[i+1] = T_opt_list[i]+T_deltas_opt[i] for i in range(len(T_deltas_opt)): deltaS_list[i] = interpolate.splint(T_opt_list[i],T_opt_list[i+1], spline_tck) return T_opt_list*unit.kelvin, deltaS_list*Cv_unit
[docs]def get_intermediate_temperatures(T_from_file, NumIntermediates): """ Given a list of temperatures and a number of intermediate states as input, this function calculates the values for temperatures intermediate between those in this list, as the mean between values in the list. :param T_from_file: List of temperatures :type T_from_file: List( float * simtk.unit.temperature ) :param NumIntermediates: The number of states to insert between existing states in 'T_from_file' :type NumIntermediates: int :returns: - Temp_k ( List( float * simtk.unit.temperature ) ) - A new list of temperatures that includes the inserted intermediates. """ deltas = [] for i in range(1, len(T_from_file)): deltas.append((T_from_file[i]._value - T_from_file[i - 1]._value) / (NumIntermediates + 1)) deltas.append((T_from_file[i]._value - T_from_file[i - 1]._value) / (NumIntermediates + 1)) originalK = len(T_from_file) Temp_k = [] val_k = [] current_T = min([T_from_file[i]._value for i in range(len(T_from_file))]) for delta in deltas: current_T = current_T + delta Temp_k.append(current_T) if len(Temp_k) != ( len(T_from_file) + NumIntermediates * (len(T_from_file) - NumIntermediates - 1) ): print("Error: new temperatures are not being assigned correctly.") print( "There were " + str(len(T_from_file)) + " temperatures before inserting intermediates," ) print(str(NumIntermediates) + " intermediate strucutures were requested,") print( "and there were " + str(len(Temp_k)) + " temperatures after inserting intermediates." ) exit() Temp_k = np.array([temp for temp in Temp_k]) return Temp_k
[docs]def get_mbar_expectation(E_kln, temperature_list, NumIntermediates, output=None, mbar=None): """ Given a properly-formatted matrix of energies with associated temperatures this function reweights with MBAR (if 'mbar'=None), and can also compute the expectation value for any property of interest. .. warning:: This function accepts an input matrix thtat has either 'E_kln' or 'E_kn' format, but always provides an 'E_kn'-formatted matrix in return. :param E_kln: A matrix of energies or samples for a property that we would like to use to make predictions with MBAR. :type E_kln: List( List( float * simtk.unit.energy for simulation_steps ) for num_replicas ) OR List( List( List( float * simtk.unit.energy for simulation_steps ) for num_replicas ) for num_replicas ) :param temperature_list: List of temperatures for the simulation data. :type temperature_list: List( float * simtk.unit.temperature ) :param NumIntermediates: The number of states to insert between existing states in 'T_from_file' :type NumIntermediates: int :param output: The 'output' option to use when calling MBAR, default = 'differences' :type output: str :param mbar: An MBAR() class object (from the 'pymbar' package), default = None :type mbar: `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ class object :returns: - mbar ( `MBAR <https://pymbar.readthedocs.io/en/latest/mbar.html>`_ ) - An MBAR() class object (from the 'pymbar' package) - E_kn ( List( List( float * simtk.unit.energy for num_samples ) for num_replicas ) ) - A matrix of energies or samples for a property that we would like to use to make predictions with MBAR. - result ( List( List( float for num_samples ) for num_replicas ) - The MBAR expectation value for the energies and/or other samples that were provided. - dresult ( List( List( float for num_samples ) for num_replicas ) - The MBAR expectation value for the energies and/or other samples that were provided. - Temp_k ( List( float * simtk.unit.temperature ) ) - A new list of temperatures that includes the inserted intermediates. """ if mbar == None: NumTemps = len(temperature_list) # Last TEMP # + 1 (start counting at 1) T_from_file = np.array([temperature._value for temperature in temperature_list]) E_from_file = E_kln originalK = len(T_from_file) N_k = np.zeros(originalK, np.int32) g = np.zeros(originalK, np.float64) for k in range(originalK): # subsample the energies g[k] = pymbar.timeseries.statisticalInefficiency(E_from_file[k][k]) indices = np.array( pymbar.timeseries.subsampleCorrelatedData(E_from_file[k][k], g=g[k]) ) # indices of uncorrelated samples N_k[k] = len(indices) E_from_file[k, k, 0 : N_k[k]] = E_from_file[k, k, indices] if NumIntermediates > 0: Temp_k = get_intermediate_temperatures(temperature_list, NumIntermediates) else: Temp_k = np.array([temperature._value for temperature in temperature_list]) # Update number of states K = len(Temp_k) # Loop, inserting E's into blank matrix (leaving blanks only where new Ts are inserted) Nall_k = np.zeros( [K], np.int32 ) # Number of samples (n) for each state (k) = number of iterations/energies try: E_kn = np.zeros([K, len(E_from_file[0][0])], np.float64) for k in range(originalK - 1): E_kn[k + k * NumIntermediates, 0 : N_k[k]] = E_from_file[k, k, 0 : N_k[k]] Nall_k[k + k * NumIntermediates] = N_k[k] E_kn[-1][0 : N_k[-1]] = E_from_file[-1][-1][0 : N_k[-1]] Nall_k[-1] = N_k[-1] except: E_kn = np.zeros([K, len(E_from_file[0])], np.float64) for k in range(originalK): E_kn[k + k * NumIntermediates, 0 : N_k[k]] = E_from_file[k, 0 : N_k[k]] Nall_k[k + k * NumIntermediates] = N_k[k] beta_k = 1 / (kB._value * Temp_k) allE_expect = np.zeros([K], np.float64) allE2_expect = np.zeros([K], np.float64) dE_expect = np.zeros([K], np.float64) u_kn = np.zeros( [K, sum(Nall_k)], np.float64 ) # u_kln is reduced pot. ener. of segment n of temp k evaluated at temp l # index = 0 for k in range(K): index = 0 for l in range(K): u_kn[k, index : index + Nall_k[l]] = beta_k[k] * E_kn[l, 0 : Nall_k[l]] index = index + Nall_k[l] # ------------------------------------------------------------------------ # Initialize MBAR # ------------------------------------------------------------------------ print("Initializing MBAR:") print("--K = number of Temperatures with data = %d" % (originalK)) print("--L = number of total Temperatures = %d" % (K)) print("--N = number of Energies per Temperature = %d" % (np.max(Nall_k))) mbar = pymbar.MBAR(u_kn, Nall_k, verbose=False, relative_tolerance=1e-12, initial_f_k=None) E_kn = u_kn # not a copy, we are going to write over it, but we don't need it any more. for k in range(K): E_kn[k, :] *= beta_k[k] ** ( -1 ) # get the 'unreduced' potential -- we can't take differences of reduced potentials because the beta is different; math is much more confusing with derivatives of the reduced potentials. else: E_kn = E_kln Temp_k = temperature_list if output != None: results = mbar.computeExpectations(E_kn, output="differences", state_dependent=True) results = {"mu": results[0], "sigma": results[1]} result = results["mu"] dresult = results["sigma"] else: results = mbar.computeExpectations(E_kn, state_dependent=True) results = {"mu": results[0], "sigma": results[1]} result = results["mu"] dresult = results["sigma"] return (mbar, E_kn, result, dresult, Temp_k)