Source code for cg_openmm.parameters.free_energy

import os

import matplotlib.pyplot as plt
import numpy as np
import pymbar
from cg_openmm.utilities.iotools import write_pdbfile_without_topology
from cg_openmm.utilities.random_builder import *
from openmm import unit
from openmmtools.multistate import MultiStateReporter, ReplicaExchangeAnalyzer
from scipy import interpolate
from sklearn.utils import resample

kB = unit.MOLAR_GAS_CONSTANT_R # Boltzmann constant
kB = kB.in_units_of(unit.kilojoule/unit.kelvin/unit.mole)


[docs]def classify_Q_states(Q, Q_folded): """ This function determines the conformational state of each element of the native contacts array, given a threshold for being 'folded/unfolded'. In the outputted array, 1 is 'folded' and 0 is 'unfolded'. If Q_folded is a list, of multiple cutoffs, additional classification is performed. :param Q: native contact fraction array of size [n_frames x n_states] :type Q: 2D numpy array ( float ) :param Q_folded: threshold for a native contact fraction corresponding to a folded state (Q[i,j] is folded if Q[i,j] >= Q_folded) :type Q_folded: float :returns: - array_folded_states ( 2D numpy array ( float ) ) - conformational state matrix of shape [n_frames x n_replicas] """ if type(Q_folded) == list: # Multiple states specified # Sort to ascending order: Q_folded = sorted(Q_folded) array_folded_states = np.zeros_like(Q) for q in Q_folded: array_folded_states += np.multiply((Q>=q),1) # 0 is unfolded, 1 is folded state with smallest Q cutoff, 2 is folded state with # next largest Q cutoff, and so on. else: # Assume a binary folded/unfolded system array_folded_states = np.multiply((Q>=Q_folded),1) return array_folded_states
[docs]def expectations_free_energy(Q, Q_folded, temperature_list, frame_begin=0, sample_spacing=1, output_data="output/output.nc", bootstrap_energies=None, num_intermediate_states=0, array_folded_states=None): """ This function calculates the free energy difference (with uncertainty) between all conformational states as a function of temperature. :param Q: native contact fraction array of size [n_frames x n_states] :type Q: 2D numpy array ( float ) :param Q_folded: threshold for a native contact fraction corresponding to a folded state (Q[i,j] is folded if Q[i,j] >= Q_folded) :type Q_folded: float or list ( float ) :param temperature_list: List of temperatures for the simulation data (necessary because bootstrap version doesn't read in the file) :type temperature_list: List( float * simtk.unit.temperature ) :param frame_begin: index of first frame defining the range of samples to use as a production period (default=0) :type frame_begin: int :param sample_spacing: spacing of uncorrelated data points, for example determined from pymbar timeseries subsampleCorrelatedData :type sample_spacing: int :param output_data: Path to the simulation .nc file. :type output_data: str :param num_intermediate_states: Number of unsampled thermodynamic states between sampled states to include in the calculation :type num_intermediate_states: int :param bootstrap_energies: a custom replica_energies array to be used for bootstrapping calculations. Used instead of the energies in the .nc file. :type bootstrap_energies: 2d numpy array (float) :param array_folded_states: a precomputed array classifying the different conformational states :type array_folded_states: 2d numpy array (int) :returns: - full_T_list - A 1D numpy array listing of all temperatures, including sampled and intermediate unsampled - deltaF_values - A dictionary of the form {"statei_statej": 1D numpy array}, containing free energy change for each T in full_T_list, for each conformational state transition. - deltaF uncertainty - A dictionary containing 1D numpy arrays of uncertainties corresponding to deltaF_values """ if bootstrap_energies is not None: # Use a subsampled replica_energy matrix instead of reading from file replica_energies = bootstrap_energies else: # extract reduced energies and the state indices from the .nc reporter = MultiStateReporter(output_data, open_mode="r") analyzer = ReplicaExchangeAnalyzer(reporter) ( replica_energies_all, unsampled_state_energies, neighborhoods, replica_state_indices, ) = analyzer.read_energies() # Close the data file: reporter.close() # Select production frames to analyze replica_energies = replica_energies_all[:,:,frame_begin::sample_spacing] # Check the size of the Q array: if np.shape(replica_energies)[2] != np.shape(Q)[0]: # Mismatch in number of frames. if np.shape(replica_energies_all[:,:,frame_begin::sample_spacing])[2] == np.shape(Q[::sample_spacing,:])[0]: # Correct starting frame, need to apply sampling stride: Q = Q[::sample_spacing,:] elif np.shape(replica_energies_all)[2] == np.shape(Q)[0]: # This is the full Q, slice production frames: Q = Q[production_start::sample_spacing,:] else: print(f'Error: Q array of shape {Q.shape} incompatible with energies array of shape {replica_energies.shape}') exit() # Classify Q into folded/unfolded states if array_folded_states is None: array_folded_states = classify_Q_states(Q,Q_folded) else: # Use a precomputed array_folded_states instead of standard classification scheme. # Q and Q_folded inputs are ignored. #***The array_folded_states should be sliced with sample_spacing if not a bootstrap calc: pass # Number of configurational states: n_conf_states = len(np.unique(array_folded_states)) # convert the energies from replica/evaluated state/sample form to evaluated state/sample form replica_energies = pymbar.utils.kln_to_kn(replica_energies) n_samples = len(replica_energies[0,:]) # Reshape array_folded_states to row vector for pymbar # We need to order the data by replica, rather than by frame array_folded_states = np.reshape(array_folded_states,(np.size(array_folded_states)),order='F') # determine the numerical values of beta at each state in units consisten with the temperature Tunit = temperature_list[0].unit temps = np.array([temp.value_in_unit(Tunit) for temp in temperature_list]) # should this just be array to begin with beta_k = 1 / (kB.value_in_unit(unit.kilojoule_per_mole/Tunit) * temps) # calculate the number of states we need expectations at. We want it at all of the original # temperatures, each intermediate temperature, and then temperatures +/- from the original # to take finite derivatives. # create an array for the temperature and energy for each state, including the # finite different state. n_sampled_T = len(temps) n_unsampled_states = (n_sampled_T + (n_sampled_T-1)*num_intermediate_states) unsampled_state_energies = np.zeros([n_unsampled_states,n_samples]) full_T_list = np.zeros(n_unsampled_states) # delta is the spacing between temperatures. delta = np.zeros(n_sampled_T-1) # fill in a list of temperatures at all original temperatures and all intermediate states. full_T_list[0] = temps[0] t = 0 for i in range(n_sampled_T-1): delta[i] = (temps[i+1] - temps[i])/(num_intermediate_states+1) for j in range(num_intermediate_states+1): full_T_list[t] = temps[i] + delta[i]*j t += 1 full_T_list[t] = temps[-1] n_T_vals = t+1 # calculate betas of all of these temperatures beta_full_k = 1 / (kB.value_in_unit(unit.kilojoule_per_mole/Tunit) * full_T_list) ti = 0 N_k = np.zeros(n_unsampled_states) for k in range(n_unsampled_states): # Calculate the reduced energies at all temperatures, sampled and unsample. unsampled_state_energies[k, :] = replica_energies[0,:]*(beta_full_k[k]/beta_k[0]) if ti < len(temps): # store in N_k which states do and don't have samples. if full_T_list[k] == temps[ti]: ti += 1 N_k[k] = n_samples//len(temps) # these are the states that have samples # call MBAR to find weights at all states, sampled and unsampled solver_protocol = {"method":"L-BFGS-B"} mbarT = pymbar.MBAR( unsampled_state_energies,N_k,verbose=False,relative_tolerance=1e-12, maximum_iterations=10000,solver_protocol=(solver_protocol,), ) # Calculate N expectations that a structure is in configurational state n # We need the probabilities of being in each state - first construct vectors of 0 # (not in current state) and 1 (in current state) bool_i = np.zeros((n_conf_states,array_folded_states.shape[0])) for i in range(n_conf_states): i_vector = np.full_like(array_folded_states,i) # Convert True/False to integer 1/0 for each energy data point: bool_i[i] = np.multiply((i_vector==array_folded_states),1) # Calculate the expectation of F at each unsampled states # Loop over each thermodynamic state: results = {} for i in range(len(full_T_list)): U_n = unsampled_state_energies[i,:] # compute expectations of being in conformational state n # Store results in a dictionary results[str(i)] = mbarT.computeMultipleExpectations( bool_i,U_n,compute_covariance=True) deltaF_values = {} deltaF_uncertainty = {} n_trans = 0 # store the number of unique transitions F_unit = (-kB*full_T_list[0]*Tunit).unit # units of free energy # Initialize the results dictionaries for s1 in range(n_conf_states): for s2 in range(s1+1,n_conf_states): n_trans += 1 deltaF_values[f"state{s1}_state{s2}"] = np.zeros(len(full_T_list)) deltaF_uncertainty[f"state{s1}_state{s2}"] = np.zeros(len(full_T_list)) # Compute free energies from probability ratios: for i in range(len(full_T_list)): for s1 in range(n_conf_states): for s2 in range(s1+1,n_conf_states): # Free energy change for s2 --> s1 at temp i deltaF_values[f"state{s1}_state{s2}"][i] = ( -kB*full_T_list[i]*Tunit*( np.log(results[str(i)][0][s1])- np.log(results[str(i)][0][s2]))).value_in_unit(F_unit) # Get covariance matrix: theta_i = results[str(i)][2] deltaF_uncertainty[f"state{s1}_state{s2}"][i] = ( kB*full_T_list[i]*unit.kelvin*np.sqrt( theta_i[s1,s1] + theta_i[s2,s2] - (theta_i[s2,s1]+theta_i[s1,s2]))).value_in_unit(F_unit) # Add the units back on: for s1 in range(n_conf_states): for s2 in range(s1+1,n_conf_states): deltaF_values[f"state{s1}_state{s2}"] *= F_unit deltaF_uncertainty[f"state{s1}_state{s2}"] *= F_unit full_T_list *= Tunit return full_T_list, deltaF_values, deltaF_uncertainty
[docs]def bootstrap_free_energy_folding(Q, Q_folded, array_folded_states=None, output_data="output/output.nc", frame_begin=0, sample_spacing=1, n_trial_boot=200, num_intermediate_states=0, conf_percent='sigma', plotfile_dir="output"): """ Function for computing uncertainty of free energy, entropy, and enthalpy using bootstrapping with varying starting frames. :param Q: native contact fraction array of size [n_frames x n_states] (with equilibration region already trimmed) :type Q: 2D numpy array ( float ) :param Q_folded: threshold for a native contact fraction corresponding to a folded state (Q[i,j] is folded if Q[i,j] >= Q_folded) :type Q_folded: float or list ( float ) :param array_folded_states: a precomputed array classifying the different conformational states :type array_folded_states: 2d numpy array (int) :param output_data: Path to the simulation .nc file. :type output_data: str :param frame_begin: index of first frame defining the range of samples to use as a production period (default=0) :type frame_begin: int :param sample_spacing: spacing of uncorrelated data points, for example determined from pymbar timeseries subsampleCorrelatedData :type sample_spacing: int :param n_trial_boot: number of trials to run for generating bootstrapping uncertainties (default=200) :type n_trial_boot: int :param num_intermediate_states: Number of unsampled thermodynamic states between sampled states to include in the calculation :type num_intermediate_states: int :returns: - full_T_list - A 1D numpy array listing of all temperatures, including sampled and intermediate unsampled - deltaF_values - A dictionary of the form {"statei_statej": 1D numpy array}, containing free energy change for each T in full_T_list, for each conformational state transition. - deltaF uncertainty - A dictionary containing tuple of 1D numpy arrays of lower/upper of uncertainties corresponding to deltaF_values - deltaS_values - A dictionary of the form {"statei_statej": 1D numpy array}, containing entropy change for each T in full_T_list, for each conformational state transition. - deltaS uncertainty - A dictionary containing tuple of 1D numpy arrays of lower/upper uncertainties corresponding to deltaS_values - deltaU_values - A dictionary of the form {"statei_statej": 1D numpy array}, containing enthalpy change for each T in full_T_list, for each conformational state transition. - deltaU uncertainty - A dictionary containing tuple of 1D numpy arrays of lower/upper of uncertainties corresponding to deltaU_values """ # extract reduced energies and the state indices from the .nc reporter = MultiStateReporter(output_data, open_mode="r") analyzer = ReplicaExchangeAnalyzer(reporter) ( replica_energies_all, unsampled_state_energies, neighborhoods, replica_state_indices, ) = analyzer.read_energies() # Get temperature_list from .nc file: states = reporter.read_thermodynamic_states()[0] temperature_list = [] for s in states: temperature_list.append(s.temperature) # Close the data file: reporter.close() # Select production frames to analyze replica_energies_prod = replica_energies_all[:,:,frame_begin::] # For shifting reference frame bootstrap, we need the entire Q and energy arrays starting from frame_start if array_folded_states is None: if np.shape(replica_energies_prod)[2] != np.shape(Q)[0]: print(f'Error: Q array of shape {Q.shape} incompatible with energies array of shape {replica_energies_prod.shape}') exit() else: if np.shape(replica_energies_prod)[2] != np.shape(array_folded_states)[0]: print(f'Error: Q array of shape {Q.shape} incompatible with energies array of shape {replica_energies_prod.shape}') exit() if array_folded_states is None: # Use the raw contact fractions as input to the free energy function Q_all = Q else: # Use the precomputed conformational state array as input to the free energy function array_folded_states_all = array_folded_states # Overall results: deltaF_values = {} deltaF_uncertainty = {} deltaS_values = {} deltaS_uncertainty = {} deltaU_values = {} deltaU_uncertainty = {} # Uncertainty for each sampling trial: deltaF_values_boot = {} deltaF_uncertainty_boot = {} deltaS_values_boot = {} deltaS_uncertainty_boot = {} deltaU_values_boot = {} deltaU_uncertainty_boot = {} # Get units: F_unit = (kB*unit.kelvin).unit # units of free energy T_unit = temperature_list[0].unit S_unit = F_unit/T_unit U_unit = F_unit for i_boot in range(n_trial_boot): # Here we can potentially change the reference frame for each bootstrap trial. # This requires the array slicing to be done here, not above. ref_shift = np.random.randint(sample_spacing) # Replica energies and Q already have equilibration period removed: replica_energies = replica_energies_prod[:,:,ref_shift::sample_spacing] if array_folded_states is None: Q = Q_all[ref_shift::sample_spacing,:] n_states = len(Q[0,:]) else: array_folded_states = array_folded_states_all[ref_shift::sample_spacing,:] n_states = len(array_folded_states[0,:]) # Get all possible sample indices sample_indices_all = np.arange(0,len(replica_energies[0,0,:])) # n_samples should match the size of the sliced replica energy dataset sample_indices = resample(sample_indices_all, replace=True, n_samples=len(sample_indices_all)) replica_energies_resample = np.zeros_like(replica_energies) # replica_energies is [n_states x n_states x n_frame] # Q is [nframes x n_states] if array_folded_states is None: Q_resample = np.zeros((len(sample_indices),n_states)) else: array_folded_states_resample = np.zeros((len(sample_indices),n_states)) # Select the sampled frames from Q and replica_energies: j = 0 if array_folded_states is None: for i in sample_indices: replica_energies_resample[:,:,j] = replica_energies[:,:,i] Q_resample[j,:] = Q[i,:] j += 1 else: for i in sample_indices: replica_energies_resample[:,:,j] = replica_energies[:,:,i] array_folded_states_resample[j,:] = array_folded_states[i,:] j += 1 # Run free energy expectation calculation: if array_folded_states is None: full_T_list, deltaF_values_boot[i_boot], deltaF_uncertainty_boot[i_boot] = expectations_free_energy( Q_resample, Q_folded, temperature_list, bootstrap_energies=replica_energies_resample, num_intermediate_states=num_intermediate_states, ) else: full_T_list, deltaF_values_boot[i_boot], deltaF_uncertainty_boot[i_boot] = expectations_free_energy( None, None, temperature_list, bootstrap_energies=replica_energies_resample, num_intermediate_states=num_intermediate_states, array_folded_states=array_folded_states_resample, ) # Get entropy/enthalpy for fitting current free energy data: # The inner dictionary keys will be transition names deltaS_values_boot[i_boot] = {} deltaU_values_boot[i_boot] = {} deltaS_values_boot[i_boot], deltaU_values_boot[i_boot] = get_entropy_enthalpy( deltaF_values_boot[i_boot], full_T_list) arr_deltaF_values_boot = {} arr_deltaS_values_boot = {} arr_deltaU_values_boot = {} # Loop over all conformational transitions: for key, value in deltaF_values_boot[0].items(): arr_deltaF_values_boot[key] = np.zeros((n_trial_boot, len(full_T_list))) arr_deltaS_values_boot[key] = np.zeros((n_trial_boot, len(full_T_list))) arr_deltaU_values_boot[key] = np.zeros((n_trial_boot, len(full_T_list))) # Compute mean values: # Free energy: for i_boot in range(n_trial_boot): for key, value in deltaF_values_boot[i_boot].items(): arr_deltaF_values_boot[key][i_boot,:] = value.value_in_unit(F_unit) deltaF_values = {} for key, value in arr_deltaF_values_boot.items(): deltaF_values[key] = np.mean(value,axis=0)*F_unit # Entropy: for i_boot in range(n_trial_boot): for key, value in deltaS_values_boot[i_boot].items(): arr_deltaS_values_boot[key][i_boot,:] = deltaS_values_boot[i_boot][key].value_in_unit(S_unit) deltaS_values = {} for key, value in arr_deltaS_values_boot.items(): deltaS_values[key] = np.mean(value,axis=0)*S_unit # Enthalpy: for i_boot in range(n_trial_boot): for key, value in deltaU_values_boot[i_boot].items(): arr_deltaU_values_boot[key][i_boot,:] = deltaU_values_boot[i_boot][key].value_in_unit(U_unit) deltaU_values = {} for key, value in arr_deltaU_values_boot.items(): deltaU_values[key] = np.mean(value,axis=0)*U_unit # Compute confidence intervals: deltaF_uncertainty = {} deltaS_uncertainty = {} deltaU_uncertainty = {} if conf_percent == 'sigma': # Use analytical standard deviation instead of percentile method: # Free energy: for key, value in arr_deltaF_values_boot.items(): F_std = np.std(value,axis=0)*F_unit deltaF_uncertainty[key] = (-F_std,F_std) # Entropy: for key, value in arr_deltaS_values_boot.items(): S_std = np.std(value,axis=0)*S_unit deltaS_uncertainty[key] =(-S_std,S_std) # Enthalpy: for key, value in arr_deltaU_values_boot.items(): U_std = np.std(value,axis=0)*U_unit deltaU_uncertainty[key] = (-U_std,U_std) else: # Compute specified confidence interval: p_lo = (100-conf_percent)/2 p_hi = 100-p_lo # Free energy: for key, value in arr_deltaF_values_boot.items(): F_diff = value-np.mean(value,axis=0) F_conf_lo = np.percentile(F_diff,p_lo,axis=0,interpolation='linear')*F_unit F_conf_hi = np.percentile(F_diff,p_hi,axis=0,interpolation='linear')*F_unit deltaF_uncertainty[key] = (F_conf_lo, F_conf_hi) # Entropy: for key, value in arr_deltaS_values_boot.items(): S_diff = value-np.mean(value,axis=0) S_conf_lo = np.percentile(S_diff,p_lo,axis=0,interpolation='linear')*S_unit S_conf_hi = np.percentile(S_diff,p_hi,axis=0,interpolation='linear')*S_unit deltaS_uncertainty[key] = (S_conf_lo, S_conf_hi) # Enthalpy: for key, value in arr_deltaU_values_boot.items(): U_diff = value-np.mean(value,axis=0) U_conf_lo = np.percentile(U_diff,p_lo,axis=0,interpolation='linear')*U_unit U_conf_hi = np.percentile(U_diff,p_hi,axis=0,interpolation='linear')*U_unit deltaU_uncertainty[key] = (U_conf_lo, U_conf_hi) # Plot results: # Free energy: plot_free_energy_results( full_T_list, deltaF_values, deltaF_uncertainty, plotfile=f"{plotfile_dir}/free_energy_boot.pdf") # Entropy and enthalpy: plot_entropy_enthalpy( full_T_list, deltaS_values, deltaU_values, deltaS_uncertainty=deltaS_uncertainty, deltaU_uncertainty=deltaU_uncertainty, plotfile_entropy=f"{plotfile_dir}/entropy_boot.pdf", plotfile_enthalpy=f"{plotfile_dir}/enthalpy_boot.pdf") return full_T_list, deltaF_values, deltaF_uncertainty, deltaS_values, deltaS_uncertainty, deltaU_values, deltaU_uncertainty
[docs]def get_entropy_enthalpy(deltaF, temperature_list): """ Compute enthalpy change and entropy change upon folding, given free energy of folding for a series of temperatures. :param deltaF: A dictionary containing free energy change for each T in full_T_list, for each conformational state transition. :type deltaF: dict{"statei_statej":1D numpy array} :param temperature_list: List of temperatures for the simulation data. :type temperature_list: List( float * simtk.unit.temperature ) :param plotfile_entropy: path to filename for entropy plot (no plot created if None) :type plotfile_entropy: str :param plotfile_enthalpy: path to filename for enthalpy plot (no plot created if None) :type plotfile_enthalpy: str :returns: - deltaS - dict{"statei_statej":1D numpy array} of entropy of folding values for each temperature in temperature_list - deltaU - dict{"statei_statej":1D numpy array} of enthalpy of folding values for each temperature in temperature_list """ ddeltaF = {} d2deltaF = {} spline_tck = {} deltaS = {} deltaU = {} T_unit = temperature_list[0].unit # Loop over all conformational transitions: for key,value in deltaF.items(): ddeltaF[key], d2deltaF[key], spline_tck[key] = \ get_free_energy_derivative(value, temperature_list) F_unit = value[0].unit S_unit = F_unit/T_unit U_unit = F_unit # Spline fitting function strips off units - add back: deltaS[key] = -ddeltaF[key] * F_unit / T_unit deltaU[key] = value + temperature_list*deltaS[key] return deltaS, deltaU
[docs]def plot_entropy_enthalpy( full_T_list, deltaS_values, deltaU_values, deltaS_uncertainty=None, deltaU_uncertainty=None, plotfile_entropy='entropy.pdf', plotfile_enthalpy='enthalpy.pdf'): """ Plot entropy and enthalpy difference data for each conformational state transition as a function of temperature. :param full_T_list: Array listing of all temperatures, including sampled and intermediate unsampled :type full_T_list: 1D numpy array :param deltaS_values: A dictionary containing entropy change for each T in full_T_list, for each conformational state transition. :type deltaS_values: dict{"statei_statej":1D numpy array} :param deltaU_values: A dictionary containing enthalpy change for each T in full_T_list, for each conformational state transition. :type deltaU_values: dict{"statei_statej":1D numpy array} :param deltaS_uncertainty: A dictionary containing uncertainties corresponding to deltaS_values (optional) :type deltaS_uncertainty: dict{"statei_statej": (1D numpy array, 1D numpy array)} :param deltaH_uncertainty: A dictionary containing uncertainties corresponding to deltaU_values (optional) :type deltaH_uncertainty: dict{"statei_statej": (1D numpy array, 1D numpy array)} :param plotfile_entropy: name of entropy plot file, including pdf extension :type plotfile_entropy: str :param plotfile_enthalpy: name of enthalpy plot file, including pdf extension :type plotfile_enthalpy: str """ T_unit = full_T_list[0].unit S_unit = list(deltaS_values.items())[0][1].unit U_unit = list(deltaU_values.items())[0][1].unit xlabel = f'Temperature {T_unit.get_symbol()}' # Plot entropy change as a function of T: ylabel = f'Entropy change {S_unit.get_symbol()}' legend_str = [] if deltaS_uncertainty is not None: for key,value in deltaS_values.items(): if type(deltaS_uncertainty[f"{key}"]) == tuple: # Use separate upper and lower errorbars deltaS_uncertainty_value = np.zeros((2,len(full_T_list))) deltaS_uncertainty_value[0,:] = -deltaS_uncertainty[f"{key}"][0].value_in_unit(S_unit) # Lower error deltaS_uncertainty_value[1,:] = deltaS_uncertainty[f"{key}"][1].value_in_unit(S_unit) # Upper error else: # Use single symmetric errorbar deltaS_uncertainty_value = deltaS_uncertainty[f"{key}"].value_in_unit(S_unit) plt.errorbar( full_T_list.value_in_unit(T_unit), deltaS_values[f"{key}"].value_in_unit(S_unit), deltaS_uncertainty_value, linewidth=1, markersize=6, fmt='o-', fillstyle='none', capsize=4, ) legend_str.append(key) else: for key,value in deltaS_values.items(): plt.plot( full_T_list.value_in_unit(T_unit), deltaS_values[f"{key}"].value_in_unit(S_unit), 'o-', linewidth=1, markersize=6, fillstyle='none', ) legend_str.append(key) plt.xlabel(xlabel) plt.ylabel(ylabel) pyplot.legend(legend_str) plt.savefig(f"{plotfile_entropy}") plt.close() # Plot enthalpy change as a function of T: ylabel = f'Enthalpy change {U_unit.get_symbol()}' legend_str = [] if deltaU_uncertainty is not None: for key,value in deltaU_values.items(): if type(deltaU_uncertainty[f"{key}"]) == tuple: # Use separate upper and lower errorbars deltaU_uncertainty_value = np.zeros((2,len(full_T_list))) deltaU_uncertainty_value[0,:] = -deltaU_uncertainty[f"{key}"][0].value_in_unit(U_unit) # Lower error deltaU_uncertainty_value[1,:] = deltaU_uncertainty[f"{key}"][1].value_in_unit(U_unit) # Upper error else: # Use single symmetric errorbar deltaU_uncertainty_value = deltaU_uncertainty[f"{key}"].value_in_unit(U_unit) plt.errorbar( full_T_list.value_in_unit(T_unit), deltaU_values[f"{key}"].value_in_unit(U_unit), deltaU_uncertainty_value, linewidth=1, markersize=6, fmt='o-', fillstyle='none', capsize=4, ) legend_str.append(key) else: for key,value in deltaU_values.items(): plt.plot( full_T_list.value_in_unit(T_unit), deltaU_values[f"{key}"].value_in_unit(U_unit), 'o-', linewidth=1, markersize=6, fillstyle='none', ) legend_str.append(key) plt.xlabel(xlabel) plt.ylabel(ylabel) pyplot.legend(legend_str) plt.savefig(f"{plotfile_enthalpy}") plt.close() return
[docs]def get_free_energy_derivative(deltaF, temperature_list, plotfile=None): """ Fit a heat capacity vs T dataset to cubic spline, and compute derivatives :param deltaF: free energy of folding data series :type deltaF: Quantity or numpy 1D array :param temperature_list: List of temperatures used in replica exchange simulations :type temperature: Quantity or numpy 1D array :param plotfile: path to filename to output plot (default=None) :type plotfile: str :returns: - dF_out ( 1D numpy array (float) ) - 1st derivative of free energy, from a cubic spline evaluated at each point in deltaF) - d2F_out ( 1D numpy array (float) ) - 2nd derivative of free energy, from a cubic spline evaluated at each point in deltaF) - spline_tck ( scipy spline object (tuple) ) - knot points (t), coefficients (c), and order of the spline (k) fit to deltaF data """ xdata = temperature_list ydata = deltaF # Strip units off quantities: if type(xdata[0]) == unit.quantity.Quantity: xdata_val = np.zeros((len(xdata))) xunit = xdata[0].unit for i in range(len(xdata)): xdata_val[i] = xdata[i].value_in_unit(xunit) xdata = xdata_val if type(ydata[0]) == unit.quantity.Quantity: ydata_val = np.zeros((len(ydata))) yunit = ydata[0].unit for i in range(len(ydata)): ydata_val[i] = ydata[i].value_in_unit(yunit) ydata = ydata_val # Fit cubic spline to data, no smoothing spline_tck = interpolate.splrep(xdata, ydata, s=0) xfine = np.linspace(xdata[0],xdata[-1],1000) yfine = interpolate.splev(xfine, spline_tck, der=0) dF = interpolate.splev(xfine, spline_tck, der=1) d2F = interpolate.splev(xfine, spline_tck, der=2) dF_out = interpolate.splev(xdata, spline_tck, der=1) d2F_out = interpolate.splev(xdata, spline_tck, der=2) if plotfile != None: figure, axs = plt.subplots(nrows=3,ncols=1,sharex=True) axs[0].plot(xdata,ydata,'ok', markersize=4, fillstyle='none', label='simulation data', ) axs[0].plot(xfine,yfine,'-b', label='cubic spline', ) axs[0].legend() axs[0].set_ylabel(r'$\Delta F (J/mol)$') axs[1].plot(xfine,dF,'-r', label=r'$\frac{d\Delta F}{dT}$', ) axs[1].legend() axs[1].set_ylabel(r'$\frac{d\Delta F}{dT}$') axs[2].plot(xfine,d2F,'-g', label=r'$\frac{d^{2}\Delta F}{dT^{2}}$', ) axs[2].legend() axs[2].set_ylabel(r'$\frac{d^{2}\Delta F}{dT^{2}}$') axs[2].set_xlabel(r'$T (K)$') plt.tight_layout() plt.savefig(plotfile) plt.close() return dF_out, d2F_out, spline_tck
[docs]def plot_free_energy_results(full_T_list, deltaF_values, deltaF_uncertainty,plotfile="free_energy_plot.pdf"): """ Plot free energy difference data for each conformational state transition as a function of temperature. :param full_T_list: Array listing of all temperatures, including sampled and intermediate unsampled :type full_T_list: 1D numpy array :param deltaF_values: A dictionary containing free energy change for each T in full_T_list, for each conformational state transition. :type deltaF_values: dict{"statei_statej":1D numpy array} :param deltaF_uncertainty: A dictionary containing uncertainties corresponding to deltaF_values :type deltaF_uncertainty: dict{"statei_statej": (1D numpy array, 1D numpy array)} or dict{"statei_statej": (1D numpy array)} :param plotfile: name of file, including pdf extension :type plotfile: str """ T_unit = full_T_list[0].unit F_unit = list(deltaF_values.items())[0][1].unit xlabel = f'Temperature {T_unit.get_symbol()}' ylabel = f'Free energy change {F_unit.get_symbol()}' legend_str = [] for key,value in deltaF_values.items(): if type(deltaF_uncertainty[f"{key}"]) == tuple: # Use separate upper and lower errorbars deltaF_uncertainty_value = np.zeros((2,len(full_T_list))) deltaF_uncertainty_value[0,:] = -deltaF_uncertainty[f"{key}"][0].value_in_unit(F_unit) # Lower error deltaF_uncertainty_value[1,:] = deltaF_uncertainty[f"{key}"][1].value_in_unit(F_unit) # Upper error else: # Use single symmetric errorbar deltaF_uncertainty_value = deltaF_uncertainty[f"{key}"].value_in_unit(F_unit) plt.errorbar( full_T_list.value_in_unit(T_unit), deltaF_values[f"{key}"].value_in_unit(F_unit), deltaF_uncertainty_value, linewidth=1, markersize=6, fmt='o-', fillstyle='none', capsize=4, ) legend_str.append(key) plt.xlabel(xlabel) plt.ylabel(ylabel) pyplot.legend(legend_str) plt.savefig(f"{plotfile}") plt.close() return