Source code for cg_openmm.parameters.free_energy

import os

import matplotlib.pyplot as plt
import numpy as np
import pymbar
from cg_openmm.utilities.iotools import write_pdbfile_without_topology
from cg_openmm.utilities.random_builder import *
from openmm import unit
from openmmtools.multistate import MultiStateReporter, ReplicaExchangeAnalyzer
from scipy import interpolate
from sklearn.utils import resample

kB = unit.MOLAR_GAS_CONSTANT_R # Boltzmann constant
kB = kB.in_units_of(unit.kilojoule/unit.kelvin/unit.mole)


[docs]def classify_Q_states(Q, Q_folded):
    """
    This function determines the conformational state of each element of the native contacts array, given a threshold
    for being 'folded/unfolded'. In the outputted array, 1 is 'folded' and 0 is 'unfolded'. If Q_folded is a list,
    of multiple cutoffs, additional classification is performed.
    
    :param Q: native contact fraction array of size [n_frames x n_states]
    :type Q: 2D numpy array ( float )
    
    :param Q_folded: threshold for a native contact fraction corresponding to a folded state (Q[i,j] is folded if Q[i,j] >= Q_folded)
    :type Q_folded: float
    
    :returns:
      - array_folded_states ( 2D numpy array ( float ) ) - conformational state matrix of shape [n_frames x n_replicas]
    """
    
    if type(Q_folded) == list:
        # Multiple states specified
        # Sort to ascending order:
        Q_folded = sorted(Q_folded)
        
        array_folded_states = np.zeros_like(Q)
        
        for q in Q_folded:
            array_folded_states += np.multiply((Q>=q),1)
            # 0 is unfolded, 1 is folded state with smallest Q cutoff, 2 is folded state with
            # next largest Q cutoff, and so on.
            
    else:
        # Assume a binary folded/unfolded system
        array_folded_states = np.multiply((Q>=Q_folded),1)
    
    return array_folded_states


[docs]def expectations_free_energy(Q, Q_folded, temperature_list, frame_begin=0, sample_spacing=1, output_data="output/output.nc",
    bootstrap_energies=None, num_intermediate_states=0, array_folded_states=None):
    """
    This function calculates the free energy difference (with uncertainty) between all conformational states as a function of temperature.

    :param Q: native contact fraction array of size [n_frames x n_states]
    :type Q: 2D numpy array ( float )
    
    :param Q_folded: threshold for a native contact fraction corresponding to a folded state (Q[i,j] is folded if Q[i,j] >= Q_folded)
    :type Q_folded: float or list ( float )

    :param temperature_list: List of temperatures for the simulation data (necessary because bootstrap version doesn't read in the file)
    :type temperature_list: List( float * simtk.unit.temperature )
    
    :param frame_begin: index of first frame defining the range of samples to use as a production period (default=0)
    :type frame_begin: int    
    
    :param sample_spacing: spacing of uncorrelated data points, for example determined from pymbar timeseries subsampleCorrelatedData
    :type sample_spacing: int     
    
    :param output_data: Path to the simulation .nc file.
    :type output_data: str
    
    :param num_intermediate_states: Number of unsampled thermodynamic states between sampled states to include in the calculation
    :type num_intermediate_states: int
    
    :param bootstrap_energies: a custom replica_energies array to be used for bootstrapping calculations. Used instead of the energies in the .nc file.
    :type bootstrap_energies: 2d numpy array (float)
    
    :param array_folded_states: a precomputed array classifying the different conformational states
    :type array_folded_states: 2d numpy array (int)
    
    :returns:
      - full_T_list - A 1D numpy array listing of all temperatures, including sampled and intermediate unsampled
      - deltaF_values - A dictionary of the form {"statei_statej": 1D numpy array}, containing free energy change for each T in
                        full_T_list, for each conformational state transition.
      - deltaF uncertainty - A dictionary containing 1D numpy arrays of uncertainties corresponding to deltaF_values
    """
    
    if bootstrap_energies is not None:
        # Use a subsampled replica_energy matrix instead of reading from file
        replica_energies = bootstrap_energies
        
    else:    
        # extract reduced energies and the state indices from the .nc
        reporter = MultiStateReporter(output_data, open_mode="r")
        analyzer = ReplicaExchangeAnalyzer(reporter)
        (
            replica_energies_all,
            unsampled_state_energies,
            neighborhoods,
            replica_state_indices,
        ) = analyzer.read_energies()
        
        # Close the data file:
        reporter.close()   
        
        # Select production frames to analyze
        replica_energies = replica_energies_all[:,:,frame_begin::sample_spacing]
        
        # Check the size of the Q array:
        if np.shape(replica_energies)[2] != np.shape(Q)[0]:
            # Mismatch in number of frames.
            if np.shape(replica_energies_all[:,:,frame_begin::sample_spacing])[2] == np.shape(Q[::sample_spacing,:])[0]:
                # Correct starting frame, need to apply sampling stride:
                Q = Q[::sample_spacing,:]
            elif np.shape(replica_energies_all)[2] == np.shape(Q)[0]:
                # This is the full Q, slice production frames:
                Q = Q[production_start::sample_spacing,:]
            else:
                print(f'Error: Q array of shape {Q.shape} incompatible with energies array of shape {replica_energies.shape}')
                exit()    
            
    # Classify Q into folded/unfolded states
    if array_folded_states is None:
        array_folded_states = classify_Q_states(Q,Q_folded)
    else:
        # Use a precomputed array_folded_states instead of standard classification scheme.
        # Q and Q_folded inputs are ignored.
        #***The array_folded_states should be sliced with sample_spacing if not a bootstrap calc:
        pass
    
    # Number of configurational states:
    n_conf_states = len(np.unique(array_folded_states))
        
    # convert the energies from replica/evaluated state/sample form to evaluated state/sample form
    replica_energies = pymbar.utils.kln_to_kn(replica_energies)  
    n_samples = len(replica_energies[0,:])
        
    # Reshape array_folded_states to row vector for pymbar
    # We need to order the data by replica, rather than by frame
    array_folded_states = np.reshape(array_folded_states,(np.size(array_folded_states)),order='F')
        
    # determine the numerical values of beta at each state in units consisten with the temperature
    Tunit = temperature_list[0].unit
    temps = np.array([temp.value_in_unit(Tunit)  for temp in temperature_list])  # should this just be array to begin with
    beta_k = 1 / (kB.value_in_unit(unit.kilojoule_per_mole/Tunit) * temps)

    # calculate the number of states we need expectations at.  We want it at all of the original
    # temperatures, each intermediate temperature, and then temperatures +/- from the original
    # to take finite derivatives.

    # create  an array for the temperature and energy for each state, including the
    # finite different state.
    n_sampled_T = len(temps)
    n_unsampled_states = (n_sampled_T + (n_sampled_T-1)*num_intermediate_states)
    unsampled_state_energies = np.zeros([n_unsampled_states,n_samples])
    full_T_list = np.zeros(n_unsampled_states)

    # delta is the spacing between temperatures.
    delta = np.zeros(n_sampled_T-1)

    # fill in a list of temperatures at all original temperatures and all intermediate states.
    full_T_list[0] = temps[0]
    t = 0
    for i in range(n_sampled_T-1):
        delta[i] = (temps[i+1] - temps[i])/(num_intermediate_states+1)
        for j in range(num_intermediate_states+1):
            full_T_list[t] = temps[i] + delta[i]*j
            t += 1
    full_T_list[t] = temps[-1]
    n_T_vals = t+1

    # calculate betas of all of these temperatures
    beta_full_k = 1 / (kB.value_in_unit(unit.kilojoule_per_mole/Tunit) * full_T_list)

    ti = 0
    N_k = np.zeros(n_unsampled_states)
    for k in range(n_unsampled_states):
        # Calculate the reduced energies at all temperatures, sampled and unsample.
        unsampled_state_energies[k, :] = replica_energies[0,:]*(beta_full_k[k]/beta_k[0])
        if ti < len(temps):
            # store in N_k which states do and don't have samples.
            if full_T_list[k] == temps[ti]:
                ti += 1
                N_k[k] = n_samples//len(temps)  # these are the states that have samples

    # call MBAR to find weights at all states, sampled and unsampled
    solver_protocol = {"method":"L-BFGS-B"}
    
    mbarT = pymbar.MBAR(
        unsampled_state_energies,N_k,verbose=False,relative_tolerance=1e-12,
        maximum_iterations=10000,solver_protocol=(solver_protocol,),
        )
        
    # Calculate N expectations that a structure is in configurational state n
    # We need the probabilities of being in each state - first construct vectors of 0
    # (not in current state) and 1 (in current state)

    bool_i = np.zeros((n_conf_states,array_folded_states.shape[0]))

    for i in range(n_conf_states):
        i_vector = np.full_like(array_folded_states,i)
        # Convert True/False to integer 1/0 for each energy data point:
        bool_i[i] = np.multiply((i_vector==array_folded_states),1)

    # Calculate the expectation of F at each unsampled states

    # Loop over each thermodynamic state:
    results = {}
    for i in range(len(full_T_list)):
        U_n = unsampled_state_energies[i,:]

        # compute expectations of being in conformational state n
        # Store results in a dictionary
        results[str(i)] = mbarT.computeMultipleExpectations(
            bool_i,U_n,compute_covariance=True)

    deltaF_values = {}
    deltaF_uncertainty = {}
    n_trans = 0 # store the number of unique transitions
    F_unit = (-kB*full_T_list[0]*Tunit).unit # units of free energy

    # Initialize the results dictionaries
    for s1 in range(n_conf_states):
        for s2 in range(s1+1,n_conf_states):
            n_trans += 1
            deltaF_values[f"state{s1}_state{s2}"] = np.zeros(len(full_T_list))
            deltaF_uncertainty[f"state{s1}_state{s2}"] = np.zeros(len(full_T_list))
                
    # Compute free energies from probability ratios:
    for i in range(len(full_T_list)):
        for s1 in range(n_conf_states):
            for s2 in range(s1+1,n_conf_states):
                # Free energy change for s2 --> s1 at temp i
                deltaF_values[f"state{s1}_state{s2}"][i] = (
                    -kB*full_T_list[i]*Tunit*(
                    np.log(results[str(i)][0][s1])-
                    np.log(results[str(i)][0][s2]))).value_in_unit(F_unit)
                    
                # Get covariance matrix:
                theta_i = results[str(i)][2]
                deltaF_uncertainty[f"state{s1}_state{s2}"][i] = (
                    kB*full_T_list[i]*unit.kelvin*np.sqrt(
                    theta_i[s1,s1] + theta_i[s2,s2] - (theta_i[s2,s1]+theta_i[s1,s2]))).value_in_unit(F_unit)

    # Add the units back on:
    for s1 in range(n_conf_states):
        for s2 in range(s1+1,n_conf_states):    
            deltaF_values[f"state{s1}_state{s2}"] *= F_unit
            deltaF_uncertainty[f"state{s1}_state{s2}"] *= F_unit
    full_T_list *= Tunit
                    
    return full_T_list, deltaF_values, deltaF_uncertainty
    

[docs]def bootstrap_free_energy_folding(Q, Q_folded, array_folded_states=None, output_data="output/output.nc", frame_begin=0,
    sample_spacing=1, n_trial_boot=200, num_intermediate_states=0, conf_percent='sigma', plotfile_dir="output"):
    """
    Function for computing uncertainty of free energy, entropy, and enthalpy using bootstrapping with varying starting frames.

    :param Q: native contact fraction array of size [n_frames x n_states] (with equilibration region already trimmed)
    :type Q: 2D numpy array ( float )
    
    :param Q_folded: threshold for a native contact fraction corresponding to a folded state (Q[i,j] is folded if Q[i,j] >= Q_folded)
    :type Q_folded: float or list ( float )
    
    :param array_folded_states: a precomputed array classifying the different conformational states
    :type array_folded_states: 2d numpy array (int)
    
    :param output_data: Path to the simulation .nc file.
    :type output_data: str    
    
    :param frame_begin: index of first frame defining the range of samples to use as a production period (default=0)
    :type frame_begin: int    
    
    :param sample_spacing: spacing of uncorrelated data points, for example determined from pymbar timeseries subsampleCorrelatedData
    :type sample_spacing: int     
    
    :param n_trial_boot: number of trials to run for generating bootstrapping uncertainties (default=200)
    :type n_trial_boot: int
    
    :param num_intermediate_states: Number of unsampled thermodynamic states between sampled states to include in the calculation
    :type num_intermediate_states: int
    
    :returns:
      - full_T_list - A 1D numpy array listing of all temperatures, including sampled and intermediate unsampled
      - deltaF_values - A dictionary of the form {"statei_statej": 1D numpy array}, containing free energy change for each T in
                        full_T_list, for each conformational state transition.
      - deltaF uncertainty - A dictionary containing tuple of 1D numpy arrays of lower/upper of uncertainties corresponding to deltaF_values  
      - deltaS_values - A dictionary of the form {"statei_statej": 1D numpy array}, containing entropy change for each T in
                        full_T_list, for each conformational state transition.
      - deltaS uncertainty - A dictionary containing tuple of 1D numpy arrays of lower/upper uncertainties corresponding to deltaS_values 
      - deltaU_values - A dictionary of the form {"statei_statej": 1D numpy array}, containing enthalpy change for each T in
                        full_T_list, for each conformational state transition.
      - deltaU uncertainty - A dictionary containing tuple of 1D numpy arrays of lower/upper of uncertainties corresponding to deltaU_values
    """
    
    # extract reduced energies and the state indices from the .nc
    reporter = MultiStateReporter(output_data, open_mode="r")
    analyzer = ReplicaExchangeAnalyzer(reporter)
    (
        replica_energies_all,
        unsampled_state_energies,
        neighborhoods,
        replica_state_indices,
    ) = analyzer.read_energies()    
    
    # Get temperature_list from .nc file:
    states = reporter.read_thermodynamic_states()[0]
    
    temperature_list = []
    for s in states:
        temperature_list.append(s.temperature)    
    
    # Close the data file:
    reporter.close()      
    
    # Select production frames to analyze
    replica_energies_prod = replica_energies_all[:,:,frame_begin::]
    
    # For shifting reference frame bootstrap, we need the entire Q and energy arrays starting from frame_start

    if array_folded_states is None:
        if np.shape(replica_energies_prod)[2] != np.shape(Q)[0]:
            print(f'Error: Q array of shape {Q.shape} incompatible with energies array of shape {replica_energies_prod.shape}')
            exit()
    else:
        if np.shape(replica_energies_prod)[2] != np.shape(array_folded_states)[0]:
            print(f'Error: Q array of shape {Q.shape} incompatible with energies array of shape {replica_energies_prod.shape}')
            exit()
        
    if array_folded_states is None:
        # Use the raw contact fractions as input to the free energy function
        Q_all = Q
    else:
        # Use the precomputed conformational state array as input to the free energy function
        array_folded_states_all = array_folded_states

    # Overall results:
    deltaF_values = {}
    deltaF_uncertainty = {}
    deltaS_values = {}
    deltaS_uncertainty = {}
    deltaU_values = {}
    deltaU_uncertainty = {}
    
    # Uncertainty for each sampling trial:
    deltaF_values_boot = {}
    deltaF_uncertainty_boot = {}
    
    deltaS_values_boot = {}
    deltaS_uncertainty_boot = {}
    
    deltaU_values_boot = {}
    deltaU_uncertainty_boot = {}
    
    # Get units:
    F_unit = (kB*unit.kelvin).unit # units of free energy
    T_unit = temperature_list[0].unit
    S_unit = F_unit/T_unit
    U_unit = F_unit
    
    for i_boot in range(n_trial_boot):
        # Here we can potentially change the reference frame for each bootstrap trial.
        # This requires the array slicing to be done here, not above.
        ref_shift = np.random.randint(sample_spacing)
        
        # Replica energies and Q already have equilibration period removed:
        replica_energies = replica_energies_prod[:,:,ref_shift::sample_spacing]
        
        if array_folded_states is None:
            Q = Q_all[ref_shift::sample_spacing,:]
            n_states = len(Q[0,:])
        else:
            array_folded_states = array_folded_states_all[ref_shift::sample_spacing,:]
            n_states = len(array_folded_states[0,:])
        
        # Get all possible sample indices
        sample_indices_all = np.arange(0,len(replica_energies[0,0,:]))
        # n_samples should match the size of the sliced replica energy dataset
        sample_indices = resample(sample_indices_all, replace=True, n_samples=len(sample_indices_all))
        
        replica_energies_resample = np.zeros_like(replica_energies)
        # replica_energies is [n_states x n_states x n_frame]
        # Q is [nframes x n_states]
        
        if array_folded_states is None:
            Q_resample = np.zeros((len(sample_indices),n_states))
        else:
            array_folded_states_resample = np.zeros((len(sample_indices),n_states))
        
        # Select the sampled frames from Q and replica_energies:
        j = 0
        
        if array_folded_states is None:
            for i in sample_indices:
                replica_energies_resample[:,:,j] = replica_energies[:,:,i]
                Q_resample[j,:] = Q[i,:]
                j += 1
                
        else:
            for i in sample_indices:
                replica_energies_resample[:,:,j] = replica_energies[:,:,i]
                array_folded_states_resample[j,:] = array_folded_states[i,:]
                j += 1
            
        # Run free energy expectation calculation:
        if array_folded_states is None:
            full_T_list, deltaF_values_boot[i_boot], deltaF_uncertainty_boot[i_boot] = expectations_free_energy(
                Q_resample,
                Q_folded,
                temperature_list,
                bootstrap_energies=replica_energies_resample,
                num_intermediate_states=num_intermediate_states,
            )
        else:
            full_T_list, deltaF_values_boot[i_boot], deltaF_uncertainty_boot[i_boot] = expectations_free_energy(
                None,
                None,
                temperature_list,
                bootstrap_energies=replica_energies_resample,
                num_intermediate_states=num_intermediate_states,
                array_folded_states=array_folded_states_resample,
            )
        
        # Get entropy/enthalpy for fitting current free energy data:
        # The inner dictionary keys will be transition names
        deltaS_values_boot[i_boot] = {}
        deltaU_values_boot[i_boot] = {}
        
        deltaS_values_boot[i_boot], deltaU_values_boot[i_boot] = get_entropy_enthalpy(
            deltaF_values_boot[i_boot], full_T_list)
    
    arr_deltaF_values_boot = {}
    arr_deltaS_values_boot = {}
    arr_deltaU_values_boot = {}
    
    # Loop over all conformational transitions:
    for key, value in deltaF_values_boot[0].items():
        arr_deltaF_values_boot[key] = np.zeros((n_trial_boot, len(full_T_list)))
        arr_deltaS_values_boot[key] = np.zeros((n_trial_boot, len(full_T_list)))
        arr_deltaU_values_boot[key] = np.zeros((n_trial_boot, len(full_T_list)))
        
    # Compute mean values:
    # Free energy:
    for i_boot in range(n_trial_boot):
        for key, value in deltaF_values_boot[i_boot].items():
            arr_deltaF_values_boot[key][i_boot,:] = value.value_in_unit(F_unit)
            
    deltaF_values = {}
    
    for key, value in arr_deltaF_values_boot.items():
        deltaF_values[key] = np.mean(value,axis=0)*F_unit
            
    # Entropy:        
    for i_boot in range(n_trial_boot):
        for key, value in deltaS_values_boot[i_boot].items():    
            arr_deltaS_values_boot[key][i_boot,:] = deltaS_values_boot[i_boot][key].value_in_unit(S_unit)
            
    deltaS_values = {}
    
    for key, value in arr_deltaS_values_boot.items():
        deltaS_values[key] = np.mean(value,axis=0)*S_unit         
            
    # Enthalpy:        
    for i_boot in range(n_trial_boot):
        for key, value in deltaU_values_boot[i_boot].items():
            arr_deltaU_values_boot[key][i_boot,:] = deltaU_values_boot[i_boot][key].value_in_unit(U_unit)
            
    deltaU_values = {}
    
    for key, value in arr_deltaU_values_boot.items():
        deltaU_values[key] = np.mean(value,axis=0)*U_unit
           
    # Compute confidence intervals:
    deltaF_uncertainty = {} 
    deltaS_uncertainty = {} 
    deltaU_uncertainty = {}
    
    if conf_percent == 'sigma':
        # Use analytical standard deviation instead of percentile method:
        # Free energy:
        for key, value in arr_deltaF_values_boot.items():
            F_std = np.std(value,axis=0)*F_unit    
            deltaF_uncertainty[key] = (-F_std,F_std)
            
        # Entropy:    
        for key, value in arr_deltaS_values_boot.items():
            S_std = np.std(value,axis=0)*S_unit    
            deltaS_uncertainty[key] =(-S_std,S_std)
            
        # Enthalpy:    
        for key, value in arr_deltaU_values_boot.items():
            U_std = np.std(value,axis=0)*U_unit
            deltaU_uncertainty[key] = (-U_std,U_std)
    
    else:
        # Compute specified confidence interval:
        p_lo = (100-conf_percent)/2
        p_hi = 100-p_lo
    
        # Free energy:
        for key, value in arr_deltaF_values_boot.items():
            F_diff = value-np.mean(value,axis=0)
            F_conf_lo = np.percentile(F_diff,p_lo,axis=0,interpolation='linear')*F_unit
            F_conf_hi = np.percentile(F_diff,p_hi,axis=0,interpolation='linear')*F_unit
            deltaF_uncertainty[key] = (F_conf_lo, F_conf_hi)
            
        # Entropy:
        for key, value in arr_deltaS_values_boot.items():
            S_diff = value-np.mean(value,axis=0)
            S_conf_lo = np.percentile(S_diff,p_lo,axis=0,interpolation='linear')*S_unit
            S_conf_hi = np.percentile(S_diff,p_hi,axis=0,interpolation='linear')*S_unit
            deltaS_uncertainty[key] = (S_conf_lo, S_conf_hi)
            
        # Enthalpy:
        for key, value in arr_deltaU_values_boot.items():
            U_diff = value-np.mean(value,axis=0)
            U_conf_lo = np.percentile(U_diff,p_lo,axis=0,interpolation='linear')*U_unit
            U_conf_hi = np.percentile(U_diff,p_hi,axis=0,interpolation='linear')*U_unit
            deltaU_uncertainty[key] = (U_conf_lo, U_conf_hi)
    
    # Plot results:
    
    # Free energy:
    plot_free_energy_results(
        full_T_list, deltaF_values, deltaF_uncertainty, plotfile=f"{plotfile_dir}/free_energy_boot.pdf")
        
    # Entropy and enthalpy:
    plot_entropy_enthalpy(
        full_T_list, deltaS_values, deltaU_values,
        deltaS_uncertainty=deltaS_uncertainty, deltaU_uncertainty=deltaU_uncertainty,
        plotfile_entropy=f"{plotfile_dir}/entropy_boot.pdf", plotfile_enthalpy=f"{plotfile_dir}/enthalpy_boot.pdf")
                    
    return full_T_list, deltaF_values, deltaF_uncertainty, deltaS_values, deltaS_uncertainty, deltaU_values, deltaU_uncertainty
    
    
[docs]def get_entropy_enthalpy(deltaF, temperature_list):
    """
    Compute enthalpy change and entropy change upon folding, given free energy of folding for a series of temperatures.
    
    :param deltaF: A dictionary containing free energy change for each T in full_T_list, for each conformational state transition.
    :type deltaF: dict{"statei_statej":1D numpy array}
    
    :param temperature_list: List of temperatures for the simulation data.
    :type temperature_list: List( float * simtk.unit.temperature )
    
    :param plotfile_entropy: path to filename for entropy plot (no plot created if None)
    :type plotfile_entropy: str
    
    :param plotfile_enthalpy: path to filename for enthalpy plot (no plot created if None)
    :type plotfile_enthalpy: str
    
    :returns:
      - deltaS - dict{"statei_statej":1D numpy array} of entropy of folding values for each temperature in temperature_list
      - deltaU - dict{"statei_statej":1D numpy array} of enthalpy of folding values for each temperature in temperature_list
      
    """
    
    ddeltaF = {}
    d2deltaF = {}
    spline_tck = {}
    deltaS = {}
    deltaU = {}

    T_unit = temperature_list[0].unit    
    
    # Loop over all conformational transitions:
    for key,value in deltaF.items():
        ddeltaF[key], d2deltaF[key], spline_tck[key] = \
            get_free_energy_derivative(value, temperature_list)
                
        F_unit = value[0].unit
        S_unit = F_unit/T_unit
        U_unit = F_unit
        
        # Spline fitting function strips off units - add back:
        deltaS[key] = -ddeltaF[key] * F_unit / T_unit
        
        deltaU[key] = value + temperature_list*deltaS[key]    
        
    return deltaS, deltaU
  
  
[docs]def plot_entropy_enthalpy(
    full_T_list, deltaS_values, deltaU_values, deltaS_uncertainty=None, deltaU_uncertainty=None,
    plotfile_entropy='entropy.pdf', plotfile_enthalpy='enthalpy.pdf'):
    """
    Plot entropy and enthalpy difference data for each conformational state transition as a function of temperature.

    :param full_T_list: Array listing of all temperatures, including sampled and intermediate unsampled
    :type full_T_list: 1D numpy array
    
    :param deltaS_values: A dictionary containing entropy change for each T in full_T_list, for each conformational state transition.
    :type deltaS_values: dict{"statei_statej":1D numpy array}
    
    :param deltaU_values: A dictionary containing enthalpy change for each T in full_T_list, for each conformational state transition.
    :type deltaU_values: dict{"statei_statej":1D numpy array}
    
    :param deltaS_uncertainty: A dictionary containing uncertainties corresponding to deltaS_values (optional)
    :type deltaS_uncertainty: dict{"statei_statej": (1D numpy array, 1D numpy array)}
    
    :param deltaH_uncertainty: A dictionary containing uncertainties corresponding to deltaU_values (optional)
    :type deltaH_uncertainty: dict{"statei_statej": (1D numpy array, 1D numpy array)}
    
    :param plotfile_entropy: name of entropy plot file, including pdf extension
    :type plotfile_entropy: str
    
    :param plotfile_enthalpy: name of enthalpy plot file, including pdf extension
    :type plotfile_enthalpy: str
    """

    T_unit = full_T_list[0].unit
    S_unit = list(deltaS_values.items())[0][1].unit
    U_unit = list(deltaU_values.items())[0][1].unit
    
    xlabel = f'Temperature {T_unit.get_symbol()}'
    
    # Plot entropy change as a function of T:
    ylabel = f'Entropy change {S_unit.get_symbol()}'
    legend_str = []

    if deltaS_uncertainty is not None:
        for key,value in deltaS_values.items():
            if type(deltaS_uncertainty[f"{key}"]) == tuple:
                # Use separate upper and lower errorbars
                deltaS_uncertainty_value = np.zeros((2,len(full_T_list)))
                deltaS_uncertainty_value[0,:] = -deltaS_uncertainty[f"{key}"][0].value_in_unit(S_unit) # Lower error
                deltaS_uncertainty_value[1,:] = deltaS_uncertainty[f"{key}"][1].value_in_unit(S_unit) # Upper error
            else:
                # Use single symmetric errorbar
                deltaS_uncertainty_value = deltaS_uncertainty[f"{key}"].value_in_unit(S_unit)
            
            plt.errorbar(
                full_T_list.value_in_unit(T_unit),
                deltaS_values[f"{key}"].value_in_unit(S_unit),
                deltaS_uncertainty_value,
                linewidth=1,
                markersize=6,
                fmt='o-',
                fillstyle='none',
                capsize=4,
            )
            legend_str.append(key)
    else:
        for key,value in deltaS_values.items():
            plt.plot(
                full_T_list.value_in_unit(T_unit),
                deltaS_values[f"{key}"].value_in_unit(S_unit),
                'o-',
                linewidth=1,
                markersize=6,
                fillstyle='none',
            )
            legend_str.append(key)
        
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    pyplot.legend(legend_str)
    plt.savefig(f"{plotfile_entropy}") 
    plt.close()
    
    # Plot enthalpy change as a function of T:
    ylabel = f'Enthalpy change {U_unit.get_symbol()}'
    legend_str = []

    if deltaU_uncertainty is not None:
        for key,value in deltaU_values.items():
            if type(deltaU_uncertainty[f"{key}"]) == tuple:
                # Use separate upper and lower errorbars
                deltaU_uncertainty_value = np.zeros((2,len(full_T_list)))
                deltaU_uncertainty_value[0,:] = -deltaU_uncertainty[f"{key}"][0].value_in_unit(U_unit) # Lower error
                deltaU_uncertainty_value[1,:] = deltaU_uncertainty[f"{key}"][1].value_in_unit(U_unit) # Upper error
            else:
                # Use single symmetric errorbar
                deltaU_uncertainty_value = deltaU_uncertainty[f"{key}"].value_in_unit(U_unit)
            
            plt.errorbar(
                full_T_list.value_in_unit(T_unit),
                deltaU_values[f"{key}"].value_in_unit(U_unit),
                deltaU_uncertainty_value,
                linewidth=1,
                markersize=6,
                fmt='o-',
                fillstyle='none',
                capsize=4,
            )
            legend_str.append(key)
    else:
        for key,value in deltaU_values.items():
            plt.plot(
                full_T_list.value_in_unit(T_unit),
                deltaU_values[f"{key}"].value_in_unit(U_unit),
                'o-',
                linewidth=1,
                markersize=6,
                fillstyle='none',
            )
            legend_str.append(key)
        
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    pyplot.legend(legend_str)
    plt.savefig(f"{plotfile_enthalpy}") 
    plt.close()
    
    return
    
    
[docs]def get_free_energy_derivative(deltaF, temperature_list, plotfile=None):
    """
    Fit a heat capacity vs T dataset to cubic spline, and compute derivatives
    
    :param deltaF: free energy of folding data series
    :type deltaF: Quantity or numpy 1D array
    
    :param temperature_list: List of temperatures used in replica exchange simulations
    :type temperature: Quantity or numpy 1D array
    
    :param plotfile: path to filename to output plot (default=None)
    :type plotfile: str
    
    :returns:
          - dF_out ( 1D numpy array (float) ) - 1st derivative of free energy, from a cubic spline evaluated at each point in deltaF)
          - d2F_out ( 1D numpy array (float) ) - 2nd derivative of free energy, from a cubic spline evaluated at each point in deltaF)
          - spline_tck ( scipy spline object (tuple) ) - knot points (t), coefficients (c), and order of the spline (k) fit to deltaF data
    
    """
    xdata = temperature_list
    ydata = deltaF
    
    # Strip units off quantities:
    if type(xdata[0]) == unit.quantity.Quantity:
        xdata_val = np.zeros((len(xdata)))
        xunit = xdata[0].unit
        for i in range(len(xdata)):
            xdata_val[i] = xdata[i].value_in_unit(xunit)
        xdata = xdata_val
    
    if type(ydata[0]) == unit.quantity.Quantity:
        ydata_val = np.zeros((len(ydata)))
        yunit = ydata[0].unit
        for i in range(len(ydata)):
            ydata_val[i] = ydata[i].value_in_unit(yunit)
        ydata = ydata_val
            
    # Fit cubic spline to data, no smoothing
    spline_tck = interpolate.splrep(xdata, ydata, s=0)
    
    xfine = np.linspace(xdata[0],xdata[-1],1000)
    yfine = interpolate.splev(xfine, spline_tck, der=0)
    dF = interpolate.splev(xfine, spline_tck, der=1)
    d2F = interpolate.splev(xfine, spline_tck, der=2)
    
    dF_out = interpolate.splev(xdata, spline_tck, der=1)
    d2F_out = interpolate.splev(xdata, spline_tck, der=2)
    
    if plotfile != None:
        figure, axs = plt.subplots(nrows=3,ncols=1,sharex=True)
        
        axs[0].plot(xdata,ydata,'ok',
            markersize=4,
            fillstyle='none',
            label='simulation data',
        )
        
        axs[0].plot(xfine,yfine,'-b',
            label='cubic spline',
        )
        
        axs[0].legend()
        axs[0].set_ylabel(r'$\Delta F (J/mol)$')
        
        axs[1].plot(xfine,dF,'-r',
            label=r'$\frac{d\Delta F}{dT}$',
        )
        
        axs[1].legend()
        axs[1].set_ylabel(r'$\frac{d\Delta F}{dT}$')
        
        axs[2].plot(xfine,d2F,'-g',
            label=r'$\frac{d^{2}\Delta F}{dT^{2}}$',
        )
        
        axs[2].legend()
        axs[2].set_ylabel(r'$\frac{d^{2}\Delta F}{dT^{2}}$')
        axs[2].set_xlabel(r'$T (K)$')
        
        plt.tight_layout()
        
        plt.savefig(plotfile)
        plt.close()
    
    return dF_out, d2F_out, spline_tck    
    

[docs]def plot_free_energy_results(full_T_list, deltaF_values, deltaF_uncertainty,plotfile="free_energy_plot.pdf"):   
    """
    Plot free energy difference data for each conformational state transition as a function of temperature.

    :param full_T_list: Array listing of all temperatures, including sampled and intermediate unsampled
    :type full_T_list: 1D numpy array
    
    :param deltaF_values: A dictionary containing free energy change for each T in full_T_list, for each conformational state transition.
    :type deltaF_values: dict{"statei_statej":1D numpy array}
    
    :param deltaF_uncertainty: A dictionary containing uncertainties corresponding to deltaF_values
    :type deltaF_uncertainty: dict{"statei_statej": (1D numpy array, 1D numpy array)} or dict{"statei_statej": (1D numpy array)}
    
    :param plotfile: name of file, including pdf extension
    :type plotfile: str
    
    """

    T_unit = full_T_list[0].unit
    F_unit = list(deltaF_values.items())[0][1].unit
    
    xlabel = f'Temperature {T_unit.get_symbol()}'
    ylabel = f'Free energy change {F_unit.get_symbol()}'
    legend_str = []

    for key,value in deltaF_values.items():
        if type(deltaF_uncertainty[f"{key}"]) == tuple:
            # Use separate upper and lower errorbars
            deltaF_uncertainty_value = np.zeros((2,len(full_T_list)))
            deltaF_uncertainty_value[0,:] = -deltaF_uncertainty[f"{key}"][0].value_in_unit(F_unit) # Lower error
            deltaF_uncertainty_value[1,:] = deltaF_uncertainty[f"{key}"][1].value_in_unit(F_unit) # Upper error
        else:
            # Use single symmetric errorbar
            deltaF_uncertainty_value = deltaF_uncertainty[f"{key}"].value_in_unit(F_unit)
        plt.errorbar(
            full_T_list.value_in_unit(T_unit),
            deltaF_values[f"{key}"].value_in_unit(F_unit),
            deltaF_uncertainty_value,
            linewidth=1,
            markersize=6,
            fmt='o-',
            fillstyle='none',
            capsize=4,
        )
        legend_str.append(key)
        
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    pyplot.legend(legend_str)
    plt.savefig(f"{plotfile}")
    plt.close()

    return