libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
peptidemodel.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/specglob/peptidemodel.h
3 * \date 14/11/2023
4 * \author Olivier Langella
5 * \brief SpecGlobTool peptide model
6 *
7 * C++ implementation of the SpecGlob algorithm described in :
8 * 1. Prunier, G. et al. Fast alignment of mass spectra in large proteomics
9 * datasets, capturing dissimilarities arising from multiple complex
10 * modifications of peptides. BMC Bioinformatics 24, 421 (2023).
11 *
12 * HAL Id : hal-04296170 , version 1
13 * Mot de passe : hxo20cl
14 * DOI : 10.1186/s12859-023-05555-y
15 */
16
17
18/*
19 * SpecGlobTool, Spectra to peptide alignment tool
20 * Copyright (C) 2023 Olivier Langella
21 * <olivier.langella@universite-paris-saclay.fr>
22 *
23 * This program is free software: you can redistribute ipetide to spectrum
24 * alignmentt and/or modify it under the terms of the GNU General Public License
25 * as published by the Free Software Foundation, either version 3 of the
26 * License, or (at your option) any later version.
27 *
28 * This program is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 * GNU General Public License for more details.
32 *
33 * You should have received a copy of the GNU General Public License
34 * along with this program. If not, see <http://www.gnu.org/licenses/>.
35 *
36 */
37
38#include "peptidemodel.h"
39#include "../../peptide/peptiderawfragmentmasses.h"
40#include "../../pappsoexception.h"
41#include "../../utils.h"
42
43
44namespace pappso
45{
46namespace specglob
47{
49{
50 // not valid
51}
52
54 const pappso::Peptide &peptide)
55 : std::vector<AminoAcidModel>()
56{
58 mcsp_peakList = qmass_spectrum.getMassSpectrumCstSPtr();
61 for(auto aa_pep : peptide)
62 {
63 aa_pep.removeInternalCterModification();
64 aa_pep.removeInternalNterModification();
65
66 push_back({aa_pep, SpectralAlignmentType::nonAlign, false, 0.0});
67 }
68
69 // internal:Nter_hydrolytic_cleavage_H
70
71 // internal:Cter_hydrolytic_cleavage_HO
72 //
73}
74
90
110
113{
114 operator=(other);
115
117 return *this;
118}
119
123
124QString
126{
127 QString peptide_str;
128
129 if(m_beginMassDelta != 0.0)
130 peptide_str.append(
131 QString("[%1]").arg(QString::number(m_beginMassDelta, 'f', 2)));
132
133 for(auto aa_model : *this)
134 {
135 qDebug() << " aa_model.amino_acid.toString()="
136 << aa_model.amino_acid.toAbsoluteString()
137 << " aa_model.alignment_type="
138 << Utils::toString(aa_model.alignment_type)
139 << " aa_model.mass_difference=" << aa_model.mass_difference
140 << " aa_model.bracket=" << aa_model.bracket;
141
142 if(aa_model.remove)
143 peptide_str.append("-(");
144 if(aa_model.bracket)
145 {
146 peptide_str.append(
147 QString("[%1]").arg(aa_model.amino_acid.toString()));
148 }
149 else
150 {
151 peptide_str.append(aa_model.amino_acid.toString());
152 }
153
154
155 if(aa_model.mass_difference != 0.0)
156 {
157 peptide_str.append(QString("[%1]").arg(
158 QString::number(aa_model.mass_difference, 'f', 2)));
159 }
160
161 if(aa_model.remove)
162 peptide_str.append(")");
163 }
164
165 peptide_str.replace(")-(", "");
166 peptide_str.append(
167 QString("_[%1]").arg(QString::number(getMassDelta(), 'f', 2)));
168 peptide_str.replace("_[0.00]", "");
169 peptide_str.replace("_[-0.00]", "");
170 return peptide_str;
171}
172
173
174QString
176{
177 QString peptide_str;
178
179 // if(m_beginMassDelta != 0.0)
180 // peptide_str.append(
181 // QString("[%1]").arg(QString::number(m_beginMassDelta, 'f', 2)));
182
183 double mass_diff = m_beginMassDelta;
184 for(auto aa_model : *this)
185 {
186 qDebug() << " aa_model.amino_acid.toString()="
187 << aa_model.amino_acid.toAbsoluteString()
188 << " aa_model.alignment_type="
189 << Utils::toString(aa_model.alignment_type)
190 << " aa_model.mass_difference=" << aa_model.mass_difference
191 << " aa_model.bracket=" << aa_model.bracket;
192
193 if(aa_model.bracket)
194 {
195 }
196
197 if(aa_model.remove)
198 {
199 peptide_str.append(aa_model.amino_acid.getLetter());
200
201 peptide_str.append(
202 "[" +
204 aa_model.amino_acid.getLetter())
205 ->getAccession() +
206 "]");
207 }
208 else
209 {
210 peptide_str.append(aa_model.amino_acid.toProForma());
211
212 mass_diff += aa_model.mass_difference;
213 if(mass_diff != 0.0)
214 {
215 if(mass_diff > 0)
216 {
217 peptide_str.append(
218 QString("[+%1]").arg(QString::number(mass_diff, 'f', 4)));
219 }
220 else
221 {
222 peptide_str.append(
223 QString("[%1]").arg(QString::number(mass_diff, 'f', 4)));
224 }
225 }
226 }
227 mass_diff = 0.0;
228 }
229 return peptide_str;
230}
231double
233{
234 double mass = m_beginMassDelta;
235 if(m_cterModification != nullptr)
236 mass += m_cterModification->getMass();
237 if(m_nterModification != nullptr)
238 mass += m_nterModification->getMass();
239
240 for(auto aa_model : *this)
241 {
242 mass += aa_model.amino_acid.getMass() + aa_model.mass_difference;
243 }
244 return mass;
245}
246
247double
252
253std::size_t
255{
256 std::size_t count = 0;
257 if(m_beginMassDelta != 0.0)
258 count++;
259 for(auto aa_model : *this)
260 {
261 if(aa_model.mass_difference != 0.0)
262 count++;
263 }
264 return count;
265}
266
267void
269{
270 m_beginMassDelta = delta;
271}
272
273void
275{
276 if(m_theoreticalPeakList.size() == 0)
277 {
278 m_intensityExperimentalPeaks = mcsp_peakList.get()->totalIonCurrent();
279 generateTheoreticalPeaks(precision);
280 }
281 auto it_expe_peaks_end = mcsp_peakList.get()->end();
282 auto it_expe_peaks = mcsp_peakList.get()->begin();
283 auto it_theo_peaks = m_theoreticalPeakList.begin();
284 while((it_expe_peaks != mcsp_peakList.get()->end()) &&
285 (it_theo_peaks != m_theoreticalPeakList.end()))
286 {
287 while((it_expe_peaks != mcsp_peakList.get()->end()) &&
288 (it_expe_peaks->x < it_theo_peaks->mz_range.lower()))
289 { // find the first possible ion hit
290 it_expe_peaks++;
291 }
292 if(it_expe_peaks == mcsp_peakList.get()->end())
293 continue;
294
295 if(it_expe_peaks->x < it_theo_peaks->mz_range.upper())
296 {
297 // ok, we've got a hit
298 auto it_theo_peaks_loop = it_theo_peaks;
299 while((it_theo_peaks_loop != m_theoreticalPeakList.end()) &&
300 (it_theo_peaks_loop->mz_range.contains(it_expe_peaks->x)))
301 {
302 if((it_theo_peaks_loop->it_experimental_peak_match ==
303 it_expe_peaks_end) ||
304 (it_theo_peaks_loop->it_experimental_peak_match->y <
305 it_expe_peaks->y))
306 {
307 it_theo_peaks_loop->it_experimental_peak_match =
308 it_expe_peaks;
309 qDebug() << "match: mz=" << it_expe_peaks->x
310 << " intensity=" << it_expe_peaks->y;
311 break;
312 }
313 it_theo_peaks_loop++;
314 }
315 it_expe_peaks++;
316 }
317 else
318 {
319 it_theo_peaks++;
320 }
321 // it_expe_peaks++;
322 }
323
324 // get number of shared peaks
327 std::vector<std::vector<pappso::DataPoint>::const_iterator>
328 matched_experimental_peaks;
329 for(auto &theo_peak : m_theoreticalPeakList)
330 {
331 if(theo_peak.it_experimental_peak_match != it_expe_peaks_end)
332 {
333 matched_experimental_peaks.push_back(
334 theo_peak.it_experimental_peak_match);
335 // m_countSharedPeaks++;
336 }
337 }
338 auto it_end = std::unique(matched_experimental_peaks.begin(),
339 matched_experimental_peaks.end());
341 std::distance(matched_experimental_peaks.begin(), it_end);
342 m_intensitySharedPeaks = std::accumulate(
343 matched_experimental_peaks.begin(),
344 it_end,
345 0.0,
346 [](double k, const std::vector<pappso::DataPoint>::const_iterator &l) {
347 return (k + l->y);
348 });
349}
350
351void
353{
354 m_theoreticalPeakList.clear();
355 double cumul_mass = m_beginMassDelta + m_nterModification->getMass() -
357
358
359 auto it_expe_peaks_end = mcsp_peakList.get()->end();
360 std::size_t i = 0;
361 for(auto aa_model : *this)
362 {
363 // compute B ions
364 cumul_mass += aa_model.amino_acid.getMass() + aa_model.mass_difference;
365
366 qDebug() << cumul_mass;
367 m_theoreticalPeakList.push_back(
368 {pappso::MzRange(cumul_mass, precision), it_expe_peaks_end, i});
369 i++;
370 }
371 // remove the complete ion B (entire peptide, no fragment):
372 m_theoreticalPeakList.pop_back();
373
374 std::size_t bmaxindice = m_theoreticalPeakList.size();
375 double yion_delta =
377 cumul_mass += yion_delta + pappso::MHPLUS;
378 for(std::size_t i = 0; i < bmaxindice; i++)
379 {
380 m_theoreticalPeakList.push_back(
381 {pappso::MzRange(cumul_mass - m_theoreticalPeakList[i].mz_range.getMz(),
382 precision),
383 it_expe_peaks_end,
384 i + 1});
385 qDebug() << m_theoreticalPeakList.back().mz_range.getMz();
386 }
387
388 std::sort(
389 m_theoreticalPeakList.begin(),
392 return a.mz_range.getMz() < b.mz_range.getMz();
393 });
394}
395
396std::vector<double>
398{
399 std::vector<double> mass_list;
400 for(auto &datapoint : m_theoreticalPeakList)
401 {
402 mass_list.push_back(datapoint.mz_range.getMz());
403 }
404 return mass_list;
405}
406
407std::size_t
412
413double
418
419double
424
425void
427{
428 back().mass_difference = getMassDelta();
429}
430
431bool
433{
434 double precision_ref = precision->getNominal();
435 double offset = 0;
436 bool modified = false;
437 std::vector<AminoAcidModel>::iterator it_begin_block = end();
438 if(std::abs(m_beginMassDelta) > precision_ref)
439 {
440 it_begin_block = begin();
441 offset = m_beginMassDelta;
442
443 qDebug() << " new block offset=" << offset;
444 }
445 qDebug() << offset;
446 for(auto it = begin(); it != end(); it++)
447 {
448 qDebug() << " it->mass_difference=" << it->mass_difference;
449 if(it_begin_block == end())
450 {
451
452 if(std::abs(it->mass_difference) > precision_ref)
453 {
454 // new block
455 it_begin_block = it;
456 offset = it->mass_difference;
457 }
458 qDebug() << " new block offset=" << offset;
459 }
460 if(it_begin_block != end())
461 {
462 // existing block
463 qDebug() << " existing block offset=" << offset;
464 if(it == begin())
465 {
466 offset += it->mass_difference;
467 }
468 else
469 {
470 // where is the end of the block ?
471 qDebug() << " where is the end of the block ? offset=" << offset;
472 auto it_end_block = std::find_if(
473 it,
474 end(),
475 [offset, precision_ref](const AminoAcidModel &point) {
476 qDebug() << point.mass_difference << " "
477 << offset + point.mass_difference;
478 if(std::abs(offset + point.mass_difference) <= precision_ref)
479 return true;
480 return false;
481 });
482
483 if(it_end_block != end())
484 {
485 // eliminate mass mass_difference
486 modified = true;
487 if(it_begin_block == begin())
489 it_begin_block->mass_difference = 0.0;
490 it_end_block->mass_difference = 0.0;
491 // start new block search
492 it_begin_block = end();
493 offset = 0;
494 qDebug() << " existing block end block found";
495 }
496 else
497 {
498 qDebug() << " existing block end block not found offset="
499 << offset;
500 // close block
501 it_begin_block = end();
502 offset = 0;
503 }
504 }
505 }
506 }
507 qDebug() << toString();
508 return modified;
509}
510
511
512bool
514{ // System.out.println("Elimination des neg offsets");
515 double precision_ref = precision->getNominal();
516 auto it_reverse = rbegin();
517 bool modified = false;
518
519 while(it_reverse != rend())
520 {
521 if(it_reverse->remove)
522 {
523 // this is a part of already removed block
524 }
525 else
526 {
527 double mass_difference = it_reverse->mass_difference;
528 if(mass_difference < 0)
529 {
530 // try to find an amino acid blocl of that mass
531 double cumul_mass_aa = 0;
532 auto it_block_end = it_reverse;
533 while(it_block_end != rend())
534 {
535 cumul_mass_aa += it_block_end->amino_acid.getMass();
536 it_block_end++;
537 if(std::abs(mass_difference + cumul_mass_aa) < precision_ref)
538 {
539 modified = true;
540 // block found -> remove this range
541 for(auto it_block = it_reverse; it_block != it_block_end;
542 it_block++)
543 {
544 it_block->remove = true;
545 }
546 }
547 else
548 {
549 if((it_block_end != rend()) &&
550 (it_block_end->mass_difference != 0.0))
551 {
552 // interrupted block
553 break;
554 }
555 }
556 }
557 }
558 }
559 it_reverse++;
560 }
561
562 return modified;
563}
564
565bool
567{
568 if(m_theoreticalPeakList.size() == 0)
569 {
570 throw pappso::PappsoException("missing peak matching");
571 }
572 bool modified = false;
573 auto it_expe_peaks_end = mcsp_peakList.get()->end();
574 for(auto &theo_peak : m_theoreticalPeakList)
575 {
576 if(theo_peak.it_experimental_peak_match != it_expe_peaks_end)
577 {
578 if(at(theo_peak.aa_indice).bracket == true)
579 {
580 modified = true;
581 at(theo_peak.aa_indice).bracket = false;
582 }
583 }
584 }
585 return modified;
586}
587
588const std::vector<TheoreticalPeakDataPoint> &
593
594QString
596{
597
598 auto it_expe_peaks_end = mcsp_peakList.get()->end();
599 QString str;
600 for(auto &theo_peak : m_theoreticalPeakList)
601 {
602 double intensity = 0;
603 if(theo_peak.it_experimental_peak_match != it_expe_peaks_end)
604 {
605 intensity = theo_peak.it_experimental_peak_match->y;
606 }
607 str.append(QString("{%1,%2,%3,%4}")
608 .arg(theo_peak.mz_range.getMz())
609 .arg(intensity)
610 .arg(theo_peak.aa_indice)
611 .arg(at(theo_peak.aa_indice).amino_acid.toString()));
612 }
613 return str;
614}
615
616bool
617PeptideModel::checkForMutation(const pappso::Aa &amino_acid_candidate,
618 pappso::PrecisionPtr precision)
619{
620 bool modif = false;
621 double total_mass_to_insert = amino_acid_candidate.getMass();
622 auto insert_aa_modification =
624 amino_acid_candidate.getLetter());
625
626 double mass_difference = m_beginMassDelta;
627 for(auto &aa_model : *this)
628 {
629 mass_difference += aa_model.mass_difference;
630 auto aa_ref = aa_model.amino_acid;
631 if(mass_difference != 0.0)
632 {
633 mass_difference += aa_ref.getTotalModificationMass();
634 AaModificationP aa_ref_remove =
636 aa_model.amino_acid.getLetter());
637 // check for possible mutation
638 double current_diff =
639 (aa_ref_remove->getMass() - mass_difference) + total_mass_to_insert;
640
641
642 qDebug() << "current_diff=" << current_diff;
643 if(std::abs(current_diff) < precision->getNominal())
644 {
645 qDebug() << "modif = true=";
646 aa_model.mass_difference = 0;
647 aa_model.amino_acid.removeAllButInternalModification();
648 aa_model.amino_acid.addAaModification(aa_ref_remove);
649 aa_model.amino_acid.addAaModification(insert_aa_modification);
650 for(auto &mod : amino_acid_candidate.getModificationList())
651 {
652 aa_model.amino_acid.addAaModification(mod);
653 }
654 modif = true;
655 }
656 }
657 mass_difference = 0;
658 }
659 return modif;
660}
661
662bool
663PeptideModel::checkForMutations(const std::vector<AminoAcidChar> &aa_list,
664 pappso::PrecisionPtr precision)
665{
666 bool modif = false;
667 // AaModificationP carbamido = AaModification::getInstance("MOD:00397");
668 // AaModificationP metox = AaModification::getInstance("MOD:00719");
669
670 std::vector<AaModificationP> modificationInsertList;
671 for(const auto aa : aa_list)
672 {
673 modificationInsertList.push_back(
675 }
676 double mass_difference = m_beginMassDelta;
677 for(auto &aa_model : *this)
678 {
679 mass_difference += aa_model.mass_difference;
680 auto aa_ref = aa_model.amino_acid;
681 if(mass_difference != 0.0)
682 {
683 mass_difference += aa_ref.getTotalModificationMass();
684 AaModificationP aa_ref_remove =
686 aa_model.amino_acid.getLetter());
687 AaModificationP aa_ref_insert = nullptr;
688 double better_diff = 10;
689 // check for possible mutation
690 for(const auto aaInsertion : modificationInsertList)
691 {
692 double current_diff =
693 (aa_ref_remove->getMass() - mass_difference) +
694 aaInsertion->getMass();
695 qDebug() << aa_model.amino_acid.getLetter() << " "
696 << aaInsertion->getName() << " "
697 << aaInsertion->getMass() << " " << current_diff;
698 if(std::abs(current_diff) < std::abs(better_diff))
699 {
700 aa_ref_insert = aaInsertion;
701 better_diff = current_diff;
702 }
703 }
704 qDebug() << "better_diff=" << better_diff;
705 if((aa_ref_insert != nullptr) &&
706 (std::abs(better_diff) < precision->getNominal()))
707 {
708
709 qDebug() << "modif = true=";
710 aa_model.mass_difference = 0;
711 aa_model.amino_acid.removeAllButInternalModification();
712 aa_model.amino_acid.addAaModification(aa_ref_remove);
713 aa_model.amino_acid.addAaModification(aa_ref_insert);
714 modif = true;
715 }
716 }
717 mass_difference = 0;
718 }
719 return modif;
720}
721
722bool
724 pappso::PrecisionPtr precision)
725{
726
727 bool modif = false;
728
729 double mass_difference = m_beginMassDelta;
730 for(auto &aa_model : *this)
731 {
732 mass_difference += aa_model.mass_difference;
733 auto aa_ref = aa_model.amino_acid;
734 if(mass_difference != 0.0)
735 {
736 if(std::abs(modification->getMass() - mass_difference) <
737 precision->getNominal())
738 {
739 aa_model.mass_difference = 0;
740 aa_model.amino_acid.addAaModification(modification);
741 modif = true;
742 }
743 }
744 mass_difference = 0;
745 }
746 return modif;
747}
748
749
750bool
752 pappso::PrecisionPtr precision)
753{
754
755 bool modif = false;
756
757 double mass_difference = m_beginMassDelta;
758 for(auto &aa_model : *this)
759 {
760 mass_difference += aa_model.mass_difference;
761 auto aa_ref = aa_model.amino_acid;
762 if(aa_ref.getLetter() == aa_modified.getLetter())
763 {
764 if(mass_difference != 0.0)
765 {
766 double mass_modification = aa_modified.getTotalModificationMass();
767 if(std::abs(mass_modification - mass_difference) <
768 precision->getNominal())
769 {
770 aa_model.mass_difference = 0;
771 for(auto &modification : aa_modified.getModificationList())
772 {
773 aa_model.amino_acid.addAaModification(modification);
774 }
775 modif = true;
776 }
777 }
778 }
779 mass_difference = 0;
780 }
781 return modif;
782}
783
784} // namespace specglob
785} // namespace pappso
virtual const char & getLetter() const
Definition aabase.cpp:433
const QString & getAccession() const
pappso_double getMass() const
static AaModificationP getInstanceRemovalAccessionByAaLetter(const QChar &amino_acid)
get a PSI MOD instance corresponding to the removal of the given amino acid find the modifications th...
static AaModificationP getInstanceInsertionAccessionByAaLetter(const QChar &amino_acid)
get a PSI MOD instance corresponding to the insertion of the given amino acid find the modifications.
double getTotalModificationMass() const
get the sum of mass modifications
Definition aa.cpp:79
const std::vector< AaModificationP > & getModificationList() const
Definition aa.cpp:73
pappso_double getMass() const override
Definition aa.cpp:90
static pappso_double getDeltaMass(PeptideIon ion_type)
AaModificationP getInternalNterModification() const
Definition peptide.cpp:467
AaModificationP getInternalCterModification() const
Definition peptide.cpp:478
virtual pappso_double getNominal() const final
Definition precision.cpp:65
Class representing a fully specified mass spectrum.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
double getPrecursorMass(bool *ok_p=nullptr) const
get precursor mass given the charge stats and precursor mz
static QString toString(specglob::SpectralAlignmentType type)
Convenience function to return a string describing the specglob alingment type.
Definition utils.cpp:489
pappso::AaModificationP m_nterModification
double getIntensityExperimentalPeaks() const
std::vector< TheoreticalPeakDataPoint > m_theoreticalPeakList
std::size_t getCountSharedPeaks() const
void matchExperimentalPeaks(pappso::PrecisionPtr precision)
QString getTheoreticalPeakDataPointListToString() const
bool eliminateNegativeOffset(pappso::PrecisionPtr precision)
QString toProForma() const
get the peptide model in ProForma notation https://github.com/HUPO-PSI/ProForma/blob/master/README....
bool checkForAaModificationP(pappso::AaModificationP modification, pappso::PrecisionPtr precision)
try to replace mass differences with the given modification
pappso::AaModificationP m_cterModification
double getMassDelta() const
mass delta between experimental and theoretical mass
PeptideModel & operator=(const PeptideModel &other)
pappso::MassSpectrumCstSPtr mcsp_peakList
const std::vector< TheoreticalPeakDataPoint > & getTheoreticalPeakDataPointList() const
void generateTheoreticalPeaks(pappso::PrecisionPtr precision)
bool eliminateComplementaryDelta(pappso::PrecisionPtr precision)
bool checkForMutations(const std::vector< AminoAcidChar > &aa_list, pappso::PrecisionPtr precision)
try to replace mass differences with corresponding mutations mass delta
bool checkForAaModification(const pappso::Aa &aa_modified, pappso::PrecisionPtr precision)
try to replace mass differences with the given modifications on a specific amino acid
bool checkForMutation(const pappso::Aa &amino_acid_candidate, pappso::PrecisionPtr precision)
try to replace mass differences with corresponding mutation mass delta
std::vector< double > getTheoreticalIonMassList() const
PeptideModel & copyDeep(const PeptideModel &other)
@ nonAlign
the type of alignment to put in origin matrix NON Alignment (0 - NA)
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
@ y
Cter amino ions.
const pappso_double MHPLUS(1.007276466879)
const pappso_double MPROTIUM(1.007825032241)
SpecGlobTool peptide model.