02_peptide_level_analysis
[1]:
from pigeon_feather.data import *
from pigeon_feather.plot import *
from pigeon_feather.hxio import *
from pigeon_feather.spectra import *
import numpy as np
import pandas as pd
import datetime
import os
import pickle
import datetime
Determination of memory status is not supported on this
platform, measuring for memoryleaks will never fail
[2]:
# load the pickle file we saved in the previous notebook
today = datetime.date.today().strftime("%Y%m%d")
today = "20240722"
with open(f"./data/hdxms_data_raw_{today}.pkl", "rb") as f:
hdxms_data_list = pickle.load(f)
# back exchange correction for peptides with experimental full deuteration data based its closest match in the database
tools.backexchange_correction(hdxms_data_list)
Number of peptides with experimental max_d: 358
Number of peptides with no experimental max_d: 12
[3]:
# make folders for results
out_path = "./data/PF_input_20240722"
today_date = datetime.date.today().strftime("%Y%m%d")
# today_date = '20240722'
results_path = f"ecDHFR_results_{today_date}"
if not os.path.exists(results_path):
os.makedirs(results_path)
uptake plots
[4]:
# option 1
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
import matplotlib.ticker as ticker
import matplotlib.lines as mlines
from pigeon_feather.analysis import get_index_offset
from matplotlib.ticker import LogLocator
font = {"family": "Arial", "weight": "normal", "size": 36}
axes = {"titlesize": 36, "titleweight": "bold", "labelsize": 36, "linewidth": 5}
plt.rc("font", **font)
plt.rc("axes", **axes)
plt.rc("lines", lw=5)
colors = ["k", "red", "blue", "purple", "gray", "orange", "yellow", "green", "brown"]
all_peps = [
peptide
for hdxms_data in hdxms_data_list
for state in hdxms_data.states
for peptide in state.peptides
if peptide.note is None and state.state_name != "RAT"
]
all_idfs = [pep for pep in all_peps if pep.unique_num_timepoints > 5]
all_idfs = list(set([peptide.identifier for peptide in all_peps]))[:]
all_idfs.sort(key=lambda x: int(re.search(r"(-?\d+)", x).group()))
def idf_to_pep(idf):
return [pep for pep in all_peps if pep.identifier == idf][0]
# num_subplots_per_figure = 100
num_subplots_per_figure = 250
num_figures = math.ceil(len(all_idfs) / num_subplots_per_figure)
all_idfs_subset = all_idfs[:]
for fig_index in range(num_figures):
# Select the subset of errors for the current figure
selected_idf = all_idfs_subset[
fig_index * num_subplots_per_figure : (fig_index + 1) * num_subplots_per_figure
]
num_col = math.ceil(len(selected_idf) / 5)
fig, axs = plt.subplots(
num_col, 5, figsize=(9 * 5, 8 * num_col)
) # Adjust subplot size as needed
for i, idf in enumerate(selected_idf):
ax = axs[i // 5, i % 5]
pep = idf_to_pep(idf)
ax.axhline(y=pep.max_d, color='lightgray', linestyle='--', linewidth=5)
uptake = UptakePlot(
hdxms_data_list,
idf,
states_subset=["APO", "TRI"],
if_plot_fit=False,
figure=fig,
ax=ax,
)
ax.set_xlim(1e1, 1e5)
ax.xaxis.set_major_locator(LogLocator(base=10.0, numticks=5))
y_max = pep.theo_max_d/hdxms_data_list[0].saturation
ax.set_ylim(-0.5, y_max + 0.5)
#Custom legend
handles, labels = ax.get_legend_handles_labels()
new_labels = [label for label in labels if label in ["APO", "TRI"]]
new_handles = [
handle
for handle, label in zip(handles, labels)
if label in ["APO", "TRI"]
]
ax.legend(new_handles, new_labels, title="state", title_fontsize="small",loc='best')
# Layout adjustment and save
fig.tight_layout()
fig.savefig(f"{results_path}/DHFR_exp_uptake_{fig_index}.pdf")
pipetide comparison
Since the peptide-level comparison can only happen at the same time point between different states, let’s group timepoints within 50 seconds as the same timepoint.
[5]:
all_peptides = [
peptide
for hdxms_data in hdxms_data_list
for state in hdxms_data.states
for peptide in state.peptides
]
all_tps = [
tp
for pep in all_peptides
for tp in pep.timepoints
if tp.deut_time != np.inf and tp.deut_time != 0
]
tps_range = list(set([tp.deut_time for tp in all_tps]))
# Step 1: Sort the values
sorted_values = sorted(tps_range)
# Step 2: Group values with offsets less than 50
groups = []
current_group = []
for value in sorted_values:
if current_group and value - current_group[-1] > 50:
groups.append(current_group)
current_group = []
current_group.append(value)
# Add the last group if not empty
if current_group:
groups.append(current_group)
# Step 3: Calculate average and map values to the average integer
result_mapping = {}
for group in groups:
average = int(round(sum(group) / len(group)))
for value in group:
result_mapping[value] = average
result_mapping
for tp in all_tps:
tp.deut_time = result_mapping[tp.deut_time]
HDXStatePeptideCompares
can be created between two different states for comparison. It is a collection of all possible HDXStatePeptideCompare
, where the same peptides are in both Proteinstate. HDXStatePeptideCompare will find all the common timepoints and get the difference in D uptake.
[6]:
state1 = 'APO'
state2 = 'TRI'
# first get the states
state1_list = [i.get_state(state1) for i in hdxms_data_list]
state2_list = [i.get_state(state2) for i in hdxms_data_list]
pep_compres = HDXStatePeptideCompares(state1_list, state2_list)
pep_compres.add_all_compare()
[7]:
# check the results
for i in pep_compres.peptide_compares[:10]:
print('-----------------')
print(i.compare_info)
print(i.common_timepoints)
print(i.deut_diff)
-----------------
APO-TRI: 12-19 LYFQSISL
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. -4.73 -1.3 -0.29 5.06 9.5 18.34 17.01 0. ]
-----------------
APO-TRI: 13-19 YFQSISL
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. -2.57 -2.44 1.23 4.42 11.45 19.75 17.73 0. ]
-----------------
APO-TRI: 14-19 FQSISL
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. -1.09 -1.89 0.65 3.5 12.59 24.48 21.75 0. ]
-----------------
APO-TRI: 15-19 QSISL
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. -0.61 -1.31 2.52 5.73 16.05 27.09 24.22 0. ]
-----------------
APO-TRI: 15-20 QSISLI
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. -1.09 -1.89 0.65 3.5 14.26 24.48 21.75 0. ]
-----------------
APO-TRI: 15-16 QS
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. -0.76 -11.12 3.54 -1.2 -0.19 -0.25 -2.84 0. ]
-----------------
APO-TRI: 16-19 SISL
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. 0.51 -0.38 2.61 6.68 18.21 31.39 29.12 0. ]
-----------------
APO-TRI: 22-27 ALAVDR
[ 0. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. 28.87 37.37 35.11 11.44 2.48 -0.88 0. ]
-----------------
APO-TRI: 22-31 ALAVDRVIGM
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. 14.41 30.25 38.46 28.2 8.77 -0.53 -4.59 0. ]
-----------------
APO-TRI: 22-26 ALAVD
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. 10.6 30.79 36.83 29.21 14.64 4.28 1.39 0. ]
pseudo residue compare
pseudo residue compare is a comparison of the average D uptake between two different Proteinstate
of all the peptides that cover this residue. D here is the normalized values based on the full-D experiments (0-100)
[8]:
res_compares = HDXStateResidueCompares([i for i in range(1, 320)], state1_list, state2_list)
res_compares.add_all_compare()
[9]:
# check the results
# deut_diff is the average deuteration difference of all peptides covering the residue
for i in res_compares.residue_compares[:5]:
print('-----------------')
print(i.compare_info)
print(i.common_timepoints)
print(i.deut_diff)
-----------------
APO-TRI: 12
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. -4.73 -1.3 -0.29 5.06 9.5 18.34 17.01 0. ]
-----------------
APO-TRI: 13
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. -3.65 -1.87 0.47 4.74 10.475 19.045 17.37 0. ]
-----------------
APO-TRI: 14
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. -2.79666667 -1.87666667 0.53 4.32666667 11.18
20.85666667 18.83 0. ]
-----------------
APO-TRI: 15
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. -1.80833333 -3.325 1.38333333 3.50166667 10.61
18.98166667 16.60333333 0. ]
-----------------
APO-TRI: 16
[ 0. 46. 374. 572. 2011. 7772. 30812. 43292. inf]
[ 0. -1.47714286 -2.90428571 1.55857143 3.95571429 11.69571429
20.75428571 18.39142857 0. ]
visualization on pymol structure
Both comparison objects can be projected to a pdb structure for Visualization
[10]:
# both of the compare objects can be exported as a pymol pse file for visualization
pdb_file = 'data/6XG5_TRI.pdb'
create_compare_pymol_plot(pep_compres, colorbar_max=20, pdb_file=pdb_file, path=f'{results_path}')
create_compare_pymol_plot(res_compares, colorbar_max=20, pdb_file=pdb_file, path=f'{results_path}')
[11]:
# it can be visualized as a heatmap
create_heatmap_compare_tp(pep_compres, 20)
[11]:
[12]:
# make all the plots and save them
from itertools import product
items =[state.state_name for hdxms_data in hdxms_data_list for state in hdxms_data.states]
combinations = product(['APO'], [x for x in items if x != 'APO'])
for state1_name, state2_name in combinations:
state1_list = [i.get_state(state1_name) for i in hdxms_data_list]
state2_list = [i.get_state(state2_name) for i in hdxms_data_list]
compare = HDXStatePeptideCompares(state1_list, state2_list)
compare.add_all_compare()
heatmap_compare_tp = create_heatmap_compare_tp(compare, 20)
heatmap_compare_tp.savefig(f'{results_path}/{state1}-{state2}-heatmap-tp.png')
heatmap_compare = create_heatmap_compare(compare, 20)
heatmap_compare.savefig(f'{results_path}/{state1}-{state2}-heatmap.png')
create_compare_pymol_plot(compare, colorbar_max=20, pdb_file=pdb_file, path=results_path)
res_compares = HDXStateResidueCompares([i for i in range(1, 320)], state1_list, state2_list)
res_compares.add_all_compare()
create_compare_pymol_plot(res_compares, 20, pdb_file=pdb_file, path=results_path)