libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pappso::Enzyme Class Reference

#include <enzyme.h>

Public Member Functions

 Enzyme ()
 build the default enzyme (trypsin) with recognition_site = "([KR])([^P])"
 
 Enzyme (const QString &recognition_site)
 build any enzyme given a recognition_site
 
 ~Enzyme ()
 
void eat (std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, EnzymeProductInterface &enzyme_product) const
 digest a protein into enzyme products
 
void setMiscleavage (unsigned int miscleavage)
 sets the maximum number of missed cleavage allowed in the digestion
 
unsigned int getMiscleavage () const
 get the maximum number of missed cleavage allowed in the digestion
 
void setTakeOnlyFirstWildcard (bool take_only_first_wildcard)
 take only first m_takeOnlyFirstWildcard
 
void setMaxPeptideVariantListSize (std::size_t max_peptide_variant_list_size)
 if there are wildcards in the protein sequence : restrict the number of possible peptide sequences
 
const QRegularExpression & getQRegExpRecognitionSite () const
 

Private Member Functions

void sanityCheck (EnzymeProductInterface &enzyme_product, std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, const PeptideStr &peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) const
 
void replaceWildcards (std::vector< std::string > *p_peptide_variant_list) const
 

Private Attributes

QRegularExpression m_recognitionSite
 example with a kinase == [K,R]
 
unsigned int m_miscleavage = 0
 
bool m_takeOnlyFirstWildcard = false
 
std::size_t m_maxPeptideVariantListSize = 100
 
std::vector< char > m_wildCardX
 
std::vector< char > m_wildCardB
 
std::vector< char > m_wildCardZ
 

Detailed Description

Definition at line 31 of file enzyme.h.

Constructor & Destructor Documentation

◆ Enzyme() [1/2]

pappso::Enzyme::Enzyme ( )

build the default enzyme (trypsin) with recognition_site = "([KR])([^P])"

Definition at line 32 of file enzyme.cpp.

33{
34 m_recognitionSite.setPattern("([KR])([^P])");
35 m_miscleavage = 0;
36
37
38 char vv1[] = {'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
39 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'};
40 m_wildCardX.assign(std::begin(vv1), std::end(vv1));
41
42 char vv2[] = {'N', 'D'};
43 m_wildCardB.assign(std::begin(vv2), std::end(vv2));
44
45 char vv3[] = {'Q', 'E'};
46 m_wildCardZ.assign(std::begin(vv3), std::end(vv3));
47}
QRegularExpression m_recognitionSite
example with a kinase == [K,R]
Definition enzyme.h:89
std::vector< char > m_wildCardB
Definition enzyme.h:97
std::vector< char > m_wildCardZ
Definition enzyme.h:98
std::vector< char > m_wildCardX
Definition enzyme.h:96
unsigned int m_miscleavage
Definition enzyme.h:90

References m_miscleavage, m_recognitionSite, m_wildCardB, m_wildCardX, and m_wildCardZ.

◆ Enzyme() [2/2]

pappso::Enzyme::Enzyme ( const QString &  recognition_site)

build any enzyme given a recognition_site

Parameters
recognition_siteis a regular expression that must identify 2 motifs : one on Nter side one on Cter side

Definition at line 49 of file enzyme.cpp.

50{
51 m_recognitionSite.setPattern(recognition_site);
52 m_miscleavage = 0;
53
54
55 char vv1[] = {'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
56 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'};
57 m_wildCardX.assign(std::begin(vv1), std::end(vv1));
58
59 char vv2[] = {'N', 'D'};
60 m_wildCardB.assign(std::begin(vv2), std::end(vv2));
61
62 char vv3[] = {'Q', 'E'};
63 m_wildCardZ.assign(std::begin(vv3), std::end(vv3));
64}

References m_miscleavage, m_recognitionSite, m_wildCardB, m_wildCardX, and m_wildCardZ.

◆ ~Enzyme()

pappso::Enzyme::~Enzyme ( )

Definition at line 66 of file enzyme.cpp.

67{
68}

Member Function Documentation

◆ eat()

void pappso::Enzyme::eat ( std::int8_t  sequence_database_id,
const ProteinSp protein_sp,
bool  is_decoy,
EnzymeProductInterface enzyme_product 
) const

digest a protein into enzyme products

Parameters
sequence_database_idinteger that references the sequence fatabase (file, stream, url...)
protein_spis the original protein to be digested
is_decoytell if the current protein is a decoy (true) or normal (false) protein
enzyme_productis the object that will receive the digestion products

Definition at line 87 of file enzyme.cpp.

91{
92 /*
93 * for aa in self.aa_to_cut:
94 seq = seq.replace(aa, aa + ' ')
95 seq_stack = []
96 for s in seq.strip().split(' '):
97 seq_stack.append(s)
98 if len(seq_stack) > self.misscleavage + 1:
99 seq_stack.pop(0)
100 s2 = ""
101 for s_miss in seq_stack[::-1]:
102 s2 = s_miss + s2
103 yield s2
104 */
105 qDebug() << "Enzyme::eat begin ";
106 const QString sequence = protein_sp.get()->getSequence();
107 qDebug() << sequence;
108 QStringList peptide_list;
109 int pos = 0;
110 int peptide_start = 0;
111 int peptide_size = sequence.size();
112 QRegularExpressionMatch match_recognition_site =
113 m_recognitionSite.match(sequence, pos);
114 while(match_recognition_site.hasMatch())
115 {
116 pos = match_recognition_site.capturedStart(0);
117 peptide_size =
118 pos + match_recognition_site.captured(1).length() - peptide_start;
119 // qDebug() << "pos=" << pos << " peptide_start=" << peptide_start << "
120 // peptide_size=" << peptide_size << " " <<
121 // sequence.mid(peptide_start,peptide_size);
122 if(peptide_size > 0)
123 {
124 peptide_list.append(sequence.mid(peptide_start, peptide_size));
125 }
126 peptide_start += peptide_size;
127 pos = peptide_start; // all peptides MUST be consecutive
128 match_recognition_site = m_recognitionSite.match(sequence, pos);
129 }
130 peptide_size = sequence.size() - peptide_start;
131 if(peptide_size > 0)
132 {
133 peptide_list.append(sequence.mid(peptide_start, peptide_size));
134 }
135
136 unsigned int start = 1;
137 bool is_nter = true;
138 foreach(const QString &peptide, peptide_list)
139 {
140 // enzyme_product.setPeptide(sequence_database_id, protein_sp,is_decoy,
141 // peptide, start,is_nter,0, false);
142 sanityCheck(enzyme_product,
143 sequence_database_id,
144 protein_sp,
145 is_decoy,
146 peptide,
147 start,
148 is_nter,
149 0,
150 false);
151 is_nter = false;
152 start += peptide.size();
153 }
154
155 unsigned int miscleavage_i = 0;
156 while(miscleavage_i < m_miscleavage)
157 {
158 miscleavage_i++;
159 qDebug() << "miscleavage_i=" << miscleavage_i;
160 int chunk_number = miscleavage_i + 1;
161 unsigned int start = 1;
162 bool is_nter = true;
163
164 for(auto i = 0; i < peptide_list.size(); ++i)
165 {
166 qDebug() << "start=" << start;
167 QStringList peptide_mis_list;
168 for(auto j = 0; (j < chunk_number) && ((i + j) < peptide_list.size());
169 j++)
170 {
171 peptide_mis_list << peptide_list.at(i + j);
172 }
173 if(peptide_mis_list.size() == chunk_number)
174 {
175 // enzyme_product.setPeptide(sequence_database_id,
176 // protein_sp,is_decoy, peptide_mis_list.join(""), start,is_nter,
177 // miscleavage_i, false);
178 sanityCheck(enzyme_product,
179 sequence_database_id,
180 protein_sp,
181 is_decoy,
182 peptide_mis_list.join(""),
183 start,
184 is_nter,
185 miscleavage_i,
186 false);
187 }
188 is_nter = false;
189 start += peptide_list.at(i).size();
190 }
191 }
192}
void sanityCheck(EnzymeProductInterface &enzyme_product, std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, const PeptideStr &peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) const
Definition enzyme.cpp:276

References m_miscleavage, m_recognitionSite, and sanityCheck().

◆ getMiscleavage()

unsigned int pappso::Enzyme::getMiscleavage ( ) const

get the maximum number of missed cleavage allowed in the digestion

Returns
miscleavage maximum number of missed cleavade to allow (defaults is 0)

Definition at line 76 of file enzyme.cpp.

77{
78 return m_miscleavage;
79}

References m_miscleavage.

◆ getQRegExpRecognitionSite()

const QRegularExpression & pappso::Enzyme::getQRegExpRecognitionSite ( ) const

Definition at line 353 of file enzyme.cpp.

354{
355 return m_recognitionSite;
356}

References m_recognitionSite.

◆ replaceWildcards()

void pappso::Enzyme::replaceWildcards ( std::vector< std::string > *  p_peptide_variant_list) const
private

Definition at line 195 of file enzyme.cpp.

196{
197 std::string new_peptide = p_peptide_variant_list->at(0);
198 qDebug() << "Enzyme::replaceWildcards begin " << new_peptide.c_str();
199 std::vector<std::string> old_peptide_variant_list;
200 old_peptide_variant_list.assign(p_peptide_variant_list->begin(),
201 p_peptide_variant_list->end());
202
203
204 for(char wildcard : {'X', 'B', 'Z'})
205 {
206
207 std::size_t position = new_peptide.find(wildcard);
208 if(position == std::string::npos)
209 {
210 continue;
211 }
212 else
213 {
214 p_peptide_variant_list->clear();
215 /*
216 new_peptide[position] = 'A';
217 p_peptide_variant_list->push_back(new_peptide);
218 break;
219 */
220
221 const std::vector<char> *p_x_replace_wildcard = nullptr;
222 if(wildcard == 'X')
223 {
224 p_x_replace_wildcard = &m_wildCardX;
225 }
226 else if(wildcard == 'B')
227 {
228 p_x_replace_wildcard = &m_wildCardB;
229 }
230 else if(wildcard == 'Z')
231 {
232 p_x_replace_wildcard = &m_wildCardZ;
233 }
234
235 if(p_x_replace_wildcard != nullptr)
236 {
237 for(std::string orig_peptide : old_peptide_variant_list)
238 {
239 for(char replace : *p_x_replace_wildcard)
240 {
241 orig_peptide[position] = replace;
242 p_peptide_variant_list->push_back(orig_peptide);
243 }
244 }
245 }
246 else
247 {
248 throw ExceptionNotPossible(
249 QObject::tr("x_replace_wildcard is empty"));
250 }
251 // new_peptide[position] = 'A';
252 // p_peptide_variant_list->push_back(new_peptide);
253 // p_peptide_variant_list->resize(1);
254 // std::cerr << "Enzyme::replaceWildcards begin
255 // p_peptide_variant_list.size()=" << p_peptide_variant_list->size()
256 // <<
257 // endl;
258 break;
259 }
260 }
261 std::vector<std::string>().swap(
262 old_peptide_variant_list); // clear old_peptide_variant_list reallocating
263
264
265 qDebug() << "Enzyme::replaceWildcards end " << new_peptide.c_str();
266}

References m_wildCardB, m_wildCardX, and m_wildCardZ.

Referenced by sanityCheck().

◆ sanityCheck()

void pappso::Enzyme::sanityCheck ( EnzymeProductInterface enzyme_product,
std::int8_t  sequence_database_id,
const ProteinSp protein_sp,
bool  is_decoy,
const PeptideStr peptide,
unsigned int  start,
bool  is_nter,
unsigned int  missed_cleavage_number,
bool  semi_enzyme 
) const
private

Definition at line 276 of file enzyme.cpp.

285{
286 if(peptide.contains('X') || peptide.contains('B') || peptide.contains('Z'))
287 {
288
289 std::vector<std::string> peptide_variant_list;
290 peptide_variant_list.push_back(peptide.toStdString());
291
292 while((peptide_variant_list.at(0).find('X') != std::string::npos) ||
293 (peptide_variant_list.at(0).find('B') != std::string::npos) ||
294 (peptide_variant_list.at(0).find('Z') != std::string::npos))
295 {
296 replaceWildcards(&peptide_variant_list);
297 if(peptide_variant_list.size() > m_maxPeptideVariantListSize)
298 {
299 peptide_variant_list.resize(m_maxPeptideVariantListSize);
300 peptide_variant_list.shrink_to_fit();
301 }
302 }
303
304 // peptide_variant_list.resize(2);
306 {
307 enzyme_product.setPeptide(sequence_database_id,
308 protein_sp,
309 is_decoy,
310 QString(peptide_variant_list.at(0).c_str()),
311 start,
312 is_nter,
313 missed_cleavage_number,
314 semi_enzyme);
315 }
316 else
317 {
318 std::string peptide_variant = peptide_variant_list.back();
319 while(peptide_variant_list.size() > 0)
320 {
321 enzyme_product.setPeptide(sequence_database_id,
322 protein_sp,
323 is_decoy,
324 QString(peptide_variant.c_str()),
325 start,
326 is_nter,
327 missed_cleavage_number,
328 semi_enzyme);
329 peptide_variant_list.pop_back();
330 if(peptide_variant_list.size() > 0)
331 {
332 peptide_variant = peptide_variant_list.back();
333 }
334 }
335 }
336 std::vector<std::string>().swap(
337 peptide_variant_list); // clear peptide_variant_list reallocating
338 }
339 else
340 {
341 enzyme_product.setPeptide(sequence_database_id,
342 protein_sp,
343 is_decoy,
344 peptide,
345 start,
346 is_nter,
347 missed_cleavage_number,
348 semi_enzyme);
349 }
350}
std::size_t m_maxPeptideVariantListSize
Definition enzyme.h:93
void replaceWildcards(std::vector< std::string > *p_peptide_variant_list) const
Definition enzyme.cpp:195
bool m_takeOnlyFirstWildcard
Definition enzyme.h:91

References m_maxPeptideVariantListSize, m_takeOnlyFirstWildcard, replaceWildcards(), and pappso::EnzymeProductInterface::setPeptide().

Referenced by eat().

◆ setMaxPeptideVariantListSize()

void pappso::Enzyme::setMaxPeptideVariantListSize ( std::size_t  max_peptide_variant_list_size)

if there are wildcards in the protein sequence : restrict the number of possible peptide sequences

Parameters
max_peptide_variant_list_sizemaximum number of peptide variant (default is 100)

Definition at line 81 of file enzyme.cpp.

82{
83 m_maxPeptideVariantListSize = max_peptide_variant_list_size;
84}

References m_maxPeptideVariantListSize.

◆ setMiscleavage()

void pappso::Enzyme::setMiscleavage ( unsigned int  miscleavage)

sets the maximum number of missed cleavage allowed in the digestion

Parameters
miscleavagemaximum number of missed cleavade to allow (defaults is 0)

Definition at line 71 of file enzyme.cpp.

72{
73 m_miscleavage = miscleavage;
74}

References m_miscleavage.

◆ setTakeOnlyFirstWildcard()

void pappso::Enzyme::setTakeOnlyFirstWildcard ( bool  take_only_first_wildcard)

take only first m_takeOnlyFirstWildcard

Parameters
booltrue : switch to take only the first possibility if there are X, B or Z wildcards in sequence

Definition at line 269 of file enzyme.cpp.

270{
271 m_takeOnlyFirstWildcard = take_only_first_wildcard;
272}

References m_takeOnlyFirstWildcard.

Member Data Documentation

◆ m_maxPeptideVariantListSize

std::size_t pappso::Enzyme::m_maxPeptideVariantListSize = 100
private

Definition at line 93 of file enzyme.h.

Referenced by sanityCheck(), and setMaxPeptideVariantListSize().

◆ m_miscleavage

unsigned int pappso::Enzyme::m_miscleavage = 0
private

Definition at line 90 of file enzyme.h.

Referenced by Enzyme(), Enzyme(), eat(), getMiscleavage(), and setMiscleavage().

◆ m_recognitionSite

QRegularExpression pappso::Enzyme::m_recognitionSite
private

example with a kinase == [K,R]

Definition at line 89 of file enzyme.h.

Referenced by Enzyme(), Enzyme(), eat(), and getQRegExpRecognitionSite().

◆ m_takeOnlyFirstWildcard

bool pappso::Enzyme::m_takeOnlyFirstWildcard = false
private

Definition at line 91 of file enzyme.h.

Referenced by sanityCheck(), and setTakeOnlyFirstWildcard().

◆ m_wildCardB

std::vector<char> pappso::Enzyme::m_wildCardB
private

Definition at line 97 of file enzyme.h.

Referenced by Enzyme(), Enzyme(), and replaceWildcards().

◆ m_wildCardX

std::vector<char> pappso::Enzyme::m_wildCardX
private

Definition at line 96 of file enzyme.h.

Referenced by Enzyme(), Enzyme(), and replaceWildcards().

◆ m_wildCardZ

std::vector<char> pappso::Enzyme::m_wildCardZ
private

Definition at line 98 of file enzyme.h.

Referenced by Enzyme(), Enzyme(), and replaceWildcards().


The documentation for this class was generated from the following files: