libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pappso::specpeptidoms::SemiGlobalAlignment Class Reference

#include <semiglobalalignment.h>

Public Member Functions

 SemiGlobalAlignment (const ScoreValues &score_values, const pappso::PrecisionPtr precision_ptr, const AaCode &aaCode)
 ~SemiGlobalAlignment ()
void fastAlign (const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr)
 perform the first alignment search between a protein sequence and a spectrum. The member location heap is filled with the candidates locations.
void preciseAlign (const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, const std::size_t beginning, const std::size_t length)
 performs the second alignment search between a protein subsequence and a spectrum.
void postProcessingAlign (const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, std::size_t beginning, std::size_t length, const std::vector< double > &shifts)
 performs the post-processing : generates corrected spectra and align them
LocationSaver getLocationSaver () const
 Returns a copy of m_location_saver.
Scenario getScenario () const
 Returns a copy of m_scenario.
const AlignmentgetBestAlignment () const
 Returns a const ref to m_best_alignment.
const std::vector< KeyCell > & getInterestCells () const
 convenient function for degub purpose
const std::vector< KeyCell > & oneAlignStep (const pappso::specpeptidoms::SpOMSProtein &sequence, const std::size_t row_number, const std::vector< AaPosition > &aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
 function made for testing the fastAlign process, process one line and return the alignment matrix
void initFastAlign (const SpOMSSpectrum &spectrum)
 function made for testing the fastAlign process, initiate the variables for alignment
void initpreciseAlign (const SpOMSSpectrum &spectrum, std::size_t length2)
 function made for testing the preciseAlign process, initiate the variables for alignment

Static Public Member Functions

static std::vector< double > getPotentialMassErrors (const pappso::AaCode &aa_code, const Alignment &alignment, const QString &protein_seq)
 Returns a list of the potential mass errors corresponding to the provided alignment in the provided protein sequence.
static bool checkSequenceDiversity (const QString &sequence, std::size_t window, std::size_t minimum_aa_diversity)
 check that the sequence has a minimum of amino acid checkSequenceDiversity

Private Member Functions

void saveBestAlignment (const SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t offset)
 Stores the best alignment from m_scenario in m_best_alignment.
void correctAlign (const SpOMSProtein &protein_subseq, const SpOMSProtein *protein_ptr, const SpOMSSpectrum &spectrum, std::vector< std::size_t > &peaks_to_remove, std::size_t offset)
 Recursively performs the correction of the alignment.
void updateAlignmentMatrix (const pappso::specpeptidoms::SpOMSProtein &sequence, const std::size_t row_number, const std::vector< AaPosition > &aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
 updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenario.
bool perfectShiftPossible (const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
 indicates if a perfect shift is possible between the provided positions
std::size_t perfectShiftPossibleFrom0 (const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t current_row, const std::size_t r_peak) const
 indicates if a perfect shift is possible from the spectrum beginning to the provided peak. Returns the perfect shift origin if the shift is possible, otherwise returns the current row.
std::size_t perfectShiftPossibleEnd (const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t end_row, std::size_t end_peak) const
 indicates if a perfect shift is possible between the provided positions

Private Attributes

std::vector< KeyCellm_interest_cells
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells
const ScoreValuesm_scorevalues
const int min_score = 15
pappso::PrecisionPtr m_precision_ptr
const AaCodem_aaCode
LocationSaver m_location_saver
Scenario m_scenario
Alignment m_best_alignment
Alignment m_best_corrected_alignment
Alignment m_best_post_processed_alignment

Detailed Description

Definition at line 91 of file semiglobalalignment.h.

Constructor & Destructor Documentation

◆ SemiGlobalAlignment()

pappso::specpeptidoms::SemiGlobalAlignment::SemiGlobalAlignment ( const ScoreValues & score_values,
const pappso::PrecisionPtr precision_ptr,
const AaCode & aaCode )

Default constructor

Definition at line 80 of file semiglobalalignment.cpp.

84 : m_scorevalues(score_values), m_aaCode(aaCode)
85{
86 m_precision_ptr = precision_ptr;
87
88 KeyCell key_cell_init_first;
89 key_cell_init_first.n_row = 0;
90 key_cell_init_first.score = 0;
91 key_cell_init_first.beginning = 0;
92 key_cell_init_first.tree_id = 0;
93 m_interest_cells.push_back(key_cell_init_first);
94}

References pappso::specpeptidoms::KeyCell::beginning, m_aaCode, m_interest_cells, m_precision_ptr, m_scorevalues, pappso::specpeptidoms::KeyCell::n_row, pappso::specpeptidoms::KeyCell::score, and pappso::specpeptidoms::KeyCell::tree_id.

◆ ~SemiGlobalAlignment()

pappso::specpeptidoms::SemiGlobalAlignment::~SemiGlobalAlignment ( )

Destructor

Definition at line 804 of file semiglobalalignment.cpp.

805{
806}

Member Function Documentation

◆ checkSequenceDiversity()

bool pappso::specpeptidoms::SemiGlobalAlignment::checkSequenceDiversity ( const QString & sequence,
std::size_t window,
std::size_t minimum_aa_diversity )
static

check that the sequence has a minimum of amino acid checkSequenceDiversity

Parameters
sequenceprotein sequence
windowthe size of substring to check
minimum_aa_diversityminimum number of different amino acid in this window

Definition at line 1062 of file semiglobalalignment.cpp.

1065{
1066 qDebug() << "sequence=" << sequence << " window=" << window
1067 << " minimum_aa_diversity=" << minimum_aa_diversity;
1068 if(sequence.size() < (qsizetype)window)
1069 return false;
1070 auto it_begin = sequence.begin();
1071 auto it_end = sequence.begin() + window;
1072 QString window_copy(sequence.mid(0, window));
1073 while(it_end != sequence.end())
1074 {
1075 std::partial_sort_copy(it_begin, it_end, window_copy.begin(), window_copy.end());
1076
1077 qDebug() << window_copy;
1078 std::size_t uniqueCount =
1079 std::unique(window_copy.begin(), window_copy.end()) - window_copy.begin();
1080
1081 qDebug() << uniqueCount;
1082 if(uniqueCount < minimum_aa_diversity)
1083 return false;
1084 it_begin++;
1085 it_end++;
1086 }
1087 return true;
1088}

Referenced by preciseAlign(), pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment(), and pappso::cbor::psm::PsmSpecPeptidOmsScan::storeAlignment().

◆ correctAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::correctAlign ( const SpOMSProtein & protein_subseq,
const SpOMSProtein * protein_ptr,
const SpOMSSpectrum & spectrum,
std::vector< std::size_t > & peaks_to_remove,
std::size_t offset )
private

Recursively performs the correction of the alignment.

Parameters
protein_seqProtein reversed sequence to align.
protein_ptrProtein pointer on the sequence to align.
spectrumSpectrum to align.
peaks_to_removePeaks to remove from the spectrum.
offsetSize of the protein sequence minus beginning of the alignment. Used to compute the position of the alignment in the protein sequence.

Definition at line 280 of file semiglobalalignment.cpp.

285{
286 std::vector<AaPosition> aa_positions;
287 CorrectionTree correction_tree;
288 std::vector<std::size_t> final_peaks_to_remove;
289
290 KeyCell key_cell_init;
291 key_cell_init.beginning = 0;
292 key_cell_init.n_row = 0;
293 key_cell_init.score = m_scorevalues.get(ScoreType::init);
294 key_cell_init.tree_id = 0;
295
296 std::fill(m_interest_cells.begin(), m_interest_cells.end(), key_cell_init);
297
298 m_interest_cells.at(0).score = 0;
299
300 m_scenario.resetScenario();
301 qDebug();
302 for(qsizetype row_number = 1; row_number <= (qsizetype)sequence.size(); row_number++)
303 {
304 qDebug() << row_number - 1 << " " << sequence.size();
305 qDebug() << "sequence[row_number - 1].aa" << (char)sequence[row_number - 1].aa;
306 qDebug();
307 aa_positions = spectrum.getAaPositions(sequence[row_number - 1].code, peaks_to_remove);
308 qDebug();
309 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum, false, protein_ptr);
310 qDebug();
311 }
312
313 qDebug();
314 // Correction : if complementary peaks are used, corrected spectra without one of the two peaks
315 // are generated and aligned. The best alignment is kept.
316 qDebug() << m_scenario.getBestScore();
317 if(m_scenario.getBestScore() >
318 MIN_ALIGNMENT_SCORE) // We only correct alignments with acceptable scores
319 {
320 qDebug();
321 qDebug() << sequence.getSequence();
322 qDebug() << offset;
323 qDebug() << spectrum.getPrecursorCharge();
324 saveBestAlignment(sequence, spectrum, offset);
325 qDebug();
326 for(std::size_t iter : m_best_alignment.peaks)
327 {
328 qDebug() << "iter:" << iter << "comp:" << spectrum.getComplementaryPeak(iter);
329 if(iter == spectrum.getComplementaryPeak(iter))
330 {
331 continue;
332 }
333 else if(iter > spectrum.getComplementaryPeak(iter))
334 {
335 break;
336 }
337 else if(std::find(m_best_alignment.peaks.begin(),
338 m_best_alignment.peaks.end(),
339 spectrum.getComplementaryPeak(iter)) != m_best_alignment.peaks.end())
340 {
341 correction_tree.addPeaks(iter, spectrum.getComplementaryPeak(iter));
342 }
343 }
344 std::vector<std::vector<std::size_t>> corrections = correction_tree.getPeaks();
345 if(corrections.size() > 0)
346 {
347 for(auto new_peaks_to_remove : corrections)
348 {
349 final_peaks_to_remove = std::vector<std::size_t>(new_peaks_to_remove);
350 final_peaks_to_remove.insert(
351 final_peaks_to_remove.end(), peaks_to_remove.begin(), peaks_to_remove.end());
352 correctAlign(sequence, protein_ptr, spectrum, final_peaks_to_remove, offset);
353 }
354 }
355 else if(m_scenario.getBestScore() > m_best_corrected_alignment.score)
356 {
358 }
359 }
360 qDebug();
361}
void updateAlignmentMatrix(const pappso::specpeptidoms::SpOMSProtein &sequence, const std::size_t row_number, const std::vector< AaPosition > &aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenar...
void correctAlign(const SpOMSProtein &protein_subseq, const SpOMSProtein *protein_ptr, const SpOMSSpectrum &spectrum, std::vector< std::size_t > &peaks_to_remove, std::size_t offset)
Recursively performs the correction of the alignment.
void saveBestAlignment(const SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t offset)
Stores the best alignment from m_scenario in m_best_alignment.
const int MIN_ALIGNMENT_SCORE(15)

References pappso::specpeptidoms::CorrectionTree::addPeaks(), pappso::specpeptidoms::KeyCell::beginning, correctAlign(), pappso::specpeptidoms::SpOMSSpectrum::getAaPositions(), pappso::specpeptidoms::SpOMSSpectrum::getComplementaryPeak(), pappso::specpeptidoms::CorrectionTree::getPeaks(), pappso::specpeptidoms::SpOMSSpectrum::getPrecursorCharge(), pappso::specpeptidoms::SpOMSProtein::getSequence(), pappso::specpeptidoms::init, m_best_alignment, m_best_corrected_alignment, m_interest_cells, m_scenario, m_scorevalues, pappso::specpeptidoms::MIN_ALIGNMENT_SCORE(), pappso::specpeptidoms::KeyCell::n_row, saveBestAlignment(), pappso::specpeptidoms::KeyCell::score, pappso::specpeptidoms::KeyCell::tree_id, and updateAlignmentMatrix().

Referenced by correctAlign(), and preciseAlign().

◆ fastAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::fastAlign ( const SpOMSSpectrum & spectrum,
const SpOMSProtein * protein_ptr )

perform the first alignment search between a protein sequence and a spectrum. The member location heap is filled with the candidates locations.

Parameters
spectrumSpectrum to align
protein_ptrProtein pointer on the sequence to align.

Definition at line 103 of file semiglobalalignment.cpp.

105{
106 std::size_t sequence_length = protein_ptr->size();
107
108 initFastAlign(spectrum);
109
110 for(std::size_t row_number = 1; row_number <= sequence_length; row_number++)
111 {
112 updateAlignmentMatrix(*protein_ptr,
113 row_number,
114 spectrum.getAaPositions(protein_ptr->at(row_number - 1).code),
115 spectrum,
116 true,
117 protein_ptr);
118 }
119}
void initFastAlign(const SpOMSSpectrum &spectrum)
function made for testing the fastAlign process, initiate the variables for alignment

References pappso::specpeptidoms::SpOMSSpectrum::getAaPositions(), initFastAlign(), and updateAlignmentMatrix().

Referenced by pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment().

◆ getBestAlignment()

const pappso::specpeptidoms::Alignment & pappso::specpeptidoms::SemiGlobalAlignment::getBestAlignment ( ) const

Returns a const ref to m_best_alignment.

Definition at line 1011 of file semiglobalalignment.cpp.

1012{
1013 return m_best_alignment;
1014}

References m_best_alignment.

Referenced by pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment().

◆ getInterestCells()

const std::vector< pappso::specpeptidoms::KeyCell > & pappso::specpeptidoms::SemiGlobalAlignment::getInterestCells ( ) const

convenient function for degub purpose

Definition at line 97 of file semiglobalalignment.cpp.

98{
99 return m_interest_cells;
100}

References m_interest_cells.

Referenced by oneAlignStep().

◆ getLocationSaver()

pappso::specpeptidoms::LocationSaver pappso::specpeptidoms::SemiGlobalAlignment::getLocationSaver ( ) const

Returns a copy of m_location_saver.

Definition at line 809 of file semiglobalalignment.cpp.

References m_location_saver.

Referenced by pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment().

◆ getPotentialMassErrors()

std::vector< double > pappso::specpeptidoms::SemiGlobalAlignment::getPotentialMassErrors ( const pappso::AaCode & aa_code,
const Alignment & alignment,
const QString & protein_seq )
static

Returns a list of the potential mass errors corresponding to the provided alignment in the provided protein sequence.

Parameters
aa_codethe amino acid code of reference to get aminon acid masses
alignmentAlignment for which to get the potential mass errors.
protein_seqProtein sequence corresponding to the provided alignment.

Definition at line 1017 of file semiglobalalignment.cpp.

1020{
1021 // qDebug() << protein_seq;
1022 if(alignment.end > (std::size_t)protein_seq.size())
1023 {
1024 throw pappso::ExceptionOutOfRange(QString("alignment.end > protein_seq.size() %1 %2")
1025 .arg(alignment.end)
1026 .arg(protein_seq.size()));
1027 }
1028 std::vector<double> potential_mass_errors(alignment.shifts);
1029 double shift = alignment.end_shift;
1030 std::size_t index;
1031 if(alignment.beginning > 0)
1032 { // -1 on unsigned int makes it wrong
1033 index = alignment.beginning - 1;
1034 while(shift > 0 && index > 0)
1035 {
1036 potential_mass_errors.push_back(shift);
1037 // qDebug() << " shift=" << shift << " index=" << index
1038 // << " letter=" << protein_seq.at(index).toLatin1();
1039 shift -= aa_code.getMass(
1040 protein_seq.at(index).toLatin1()); // Aa(protein_seq.at(index).unicode()).getMass();
1041 index--;
1042 }
1043 }
1044
1045 // qDebug() << "second";
1046 shift = alignment.begin_shift;
1047 index = alignment.end + 1;
1048 while(shift > 0 && index < (std::size_t)protein_seq.size())
1049 {
1050 potential_mass_errors.push_back(shift);
1051 qDebug() << " shift=" << shift << " index=" << index
1052 << " letter=" << protein_seq.at(index).toLatin1();
1053 shift -= aa_code.getMass(
1054 protein_seq.at(index).toLatin1()); // Aa(protein_seq.at(index).unicode()).getMass();
1055 index++;
1056 }
1057 // qDebug();
1058 return potential_mass_errors;
1059}
double getMass(uint8_t aa_code) const
get the mass of the amino acid given its integer code the amino acid can bear some modification (if a...
Definition aacode.cpp:224

References pappso::specpeptidoms::Alignment::begin_shift, pappso::specpeptidoms::Alignment::beginning, pappso::specpeptidoms::Alignment::end, pappso::specpeptidoms::Alignment::end_shift, pappso::AaCode::getMass(), pappso::specpeptidoms::shift, and pappso::specpeptidoms::Alignment::shifts.

Referenced by pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment().

◆ getScenario()

pappso::specpeptidoms::Scenario pappso::specpeptidoms::SemiGlobalAlignment::getScenario ( ) const

Returns a copy of m_scenario.

Definition at line 815 of file semiglobalalignment.cpp.

816{
817 return m_scenario;
818}

References m_scenario.

◆ initFastAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::initFastAlign ( const SpOMSSpectrum & spectrum)

function made for testing the fastAlign process, initiate the variables for alignment

Definition at line 122 of file semiglobalalignment.cpp.

123{
124 // m_scenario.clear();
125 // TODO don't forget to reset any important variable
126 // m_best_alignment.reset();
127 // m_best_corrected_alignment.reset();
128 // m_best_post_processed_alignment.reset();
129
130 KeyCell key_cell_init;
131 key_cell_init.n_row = 0;
132 key_cell_init.score = m_scorevalues.get(ScoreType::init);
133 key_cell_init.beginning = 0;
134 key_cell_init.tree_id = 0;
135
136 m_interest_cells.resize(spectrum.size());
137 std::fill(m_interest_cells.begin(), m_interest_cells.end(), key_cell_init);
138
139 m_interest_cells.at(0).score = 0;
140
141 // m_location_saver.resetLocationSaver();
142 m_updated_cells.clear();
143}
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells

References pappso::specpeptidoms::KeyCell::beginning, pappso::specpeptidoms::init, m_interest_cells, m_scorevalues, m_updated_cells, pappso::specpeptidoms::KeyCell::n_row, pappso::specpeptidoms::KeyCell::score, and pappso::specpeptidoms::KeyCell::tree_id.

Referenced by fastAlign().

◆ initpreciseAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::initpreciseAlign ( const SpOMSSpectrum & spectrum,
std::size_t length2 )

function made for testing the preciseAlign process, initiate the variables for alignment

Definition at line 146 of file semiglobalalignment.cpp.

148{
149 m_scenario.reserve(length2 + 1, spectrum.size());
150 m_interest_cells.reserve(spectrum.size());
151 m_interest_cells.at(0).n_row = 0;
152 m_interest_cells.at(0).score = 0;
153 m_interest_cells.at(0).beginning = 0;
154 m_interest_cells.at(0).tree_id = 0;
155 for(std::size_t i = 1; i < m_interest_cells.size(); i++)
156 {
157 m_interest_cells.at(i).n_row = 0;
159 m_interest_cells.at(i).beginning = 0;
160 m_interest_cells.at(i).tree_id = 0;
161 }
162 for(std::size_t iter = m_interest_cells.size(); iter < spectrum.size(); iter++)
163 {
164 m_interest_cells.push_back({0, m_scorevalues.get(ScoreType::init), 0, 0});
165 }
166 m_scenario.resetScenario();
167}

References pappso::specpeptidoms::init, m_interest_cells, m_scenario, and m_scorevalues.

Referenced by preciseAlign().

◆ oneAlignStep()

const std::vector< pappso::specpeptidoms::KeyCell > & pappso::specpeptidoms::SemiGlobalAlignment::oneAlignStep ( const pappso::specpeptidoms::SpOMSProtein & sequence,
const std::size_t row_number,
const std::vector< AaPosition > & aa_positions,
const SpOMSSpectrum & spectrum,
const bool fast_align,
const pappso::specpeptidoms::SpOMSProtein * protein_ptr )

function made for testing the fastAlign process, process one line and return the alignment matrix

Definition at line 1091 of file semiglobalalignment.cpp.

1098{
1099 updateAlignmentMatrix(sequence, row_number, aa_positions, spectrum, fast_align, protein_ptr);
1100 return getInterestCells();
1101}
const std::vector< KeyCell > & getInterestCells() const
convenient function for degub purpose

References getInterestCells(), and updateAlignmentMatrix().

◆ perfectShiftPossible()

bool pappso::specpeptidoms::SemiGlobalAlignment::perfectShiftPossible ( const pappso::specpeptidoms::SpOMSProtein & sequence,
const SpOMSSpectrum & spectrum,
const std::size_t origin_row,
const std::size_t current_row,
const std::size_t l_peak,
const std::size_t r_peak ) const
private

indicates if a perfect shift is possible between the provided positions

Parameters
sequenceReversed sequence of the protein being aligned
spectrumSpectrum being aligned
origin_rowbeginning row of the aa gap to verify (== index of the first missing aa in sequence)
current_rowrow being processed (== index of the current AaPosition in sequence)
l_peakleft peak index of the mz gap to verify
r_peakright peak index of the mz gap to verify

Definition at line 702 of file semiglobalalignment.cpp.

709{
710 try
711 {
712 double missing_mass = 0;
713 auto it_end = sequence.begin() + current_row;
714 for(auto iter = sequence.begin() + origin_row; (iter != it_end) && (iter != sequence.end());
715 iter++)
716 {
717 missing_mass += iter->mass; // Aa(iter->unicode()).getMass();
718 }
719 if(missing_mass > 0)
720 {
721 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
722 return mz_range.contains(spectrum.getMZShift(l_peak, r_peak));
723 }
724 else
725 {
726 return false;
727 }
728 }
729 catch(const pappso::PappsoException &err)
730 {
731 throw pappso::PappsoException(
732 QObject::tr("perfectShiftPossible failed :\n%1").arg(err.qwhat()));
733 }
734 catch(const std::exception &error)
735 {
736 throw pappso::PappsoException(
737 QObject::tr("perfectShiftPossible failed std exception:\n%1").arg(error.what()));
738 }
739}
virtual const QString & qwhat() const

References pappso::MzRange::contains(), pappso::specpeptidoms::SpOMSSpectrum::getMZShift(), m_precision_ptr, and pappso::PappsoException::qwhat().

Referenced by updateAlignmentMatrix().

◆ perfectShiftPossibleEnd()

std::size_t pappso::specpeptidoms::SemiGlobalAlignment::perfectShiftPossibleEnd ( const pappso::specpeptidoms::SpOMSProtein & sequence,
const SpOMSSpectrum & spectrum,
std::size_t end_row,
std::size_t end_peak ) const
private

indicates if a perfect shift is possible between the provided positions

Parameters
sequenceReversed sequence of the protein being aligned
spectrumSpectrum being aligned
end_rowIndex of the last aligned row.
end_peakIndex of the last aligned peak.

Definition at line 769 of file semiglobalalignment.cpp.

774{
775 try
776 {
777 std::size_t perfect_shift_end = end_row + 1;
778 double missing_mass = spectrum.getMissingMass(end_peak);
779 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
780 double aa_mass = 0;
781 while(aa_mass < missing_mass && perfect_shift_end < (std::size_t)sequence.size() &&
782 !mz_range.contains(aa_mass))
783 {
784 aa_mass += sequence.at(perfect_shift_end - 1)
785 .mass; // Aa(sequence.at(perfect_shift_end - 1).unicode()).getMass();
786 perfect_shift_end++;
787 }
788 if(mz_range.contains(aa_mass))
789 {
790 return perfect_shift_end - 1;
791 }
792 else
793 {
794 return end_row;
795 }
796 }
797 catch(const pappso::PappsoException &err)
798 {
799 throw pappso::PappsoException(
800 QObject::tr("perfectShiftPossibleEnd failed :\n%1").arg(err.qwhat()));
801 }
802}

References pappso::MzRange::contains(), pappso::specpeptidoms::SpOMSSpectrum::getMissingMass(), m_precision_ptr, and pappso::PappsoException::qwhat().

Referenced by saveBestAlignment().

◆ perfectShiftPossibleFrom0()

std::size_t pappso::specpeptidoms::SemiGlobalAlignment::perfectShiftPossibleFrom0 ( const pappso::specpeptidoms::SpOMSProtein & sequence,
const SpOMSSpectrum & spectrum,
const std::size_t current_row,
const std::size_t r_peak ) const
private

indicates if a perfect shift is possible from the spectrum beginning to the provided peak. Returns the perfect shift origin if the shift is possible, otherwise returns the current row.

Parameters
sequenceReversed sequence of the protein being aligned
spectrumSpectrum being aligned
current_rowrow being processed (== index of the current AaPosition in sequence)
r_peakright peak index of the mz gap to verify

Definition at line 742 of file semiglobalalignment.cpp.

747{
748 std::size_t perfect_shift_origin = current_row;
749 double missing_mass = spectrum.getMZShift(0, r_peak);
750 pappso::MzRange mz_range(missing_mass, m_precision_ptr);
751 double aa_mass = 0;
752 while(aa_mass < missing_mass && perfect_shift_origin > 0 && !mz_range.contains(aa_mass))
753 {
754 aa_mass += sequence.at(perfect_shift_origin - 1)
755 .mass; // Aa(sequence.at(perfect_shift_origin - 1).unicode()).getMass();
756 perfect_shift_origin--;
757 }
758 if(mz_range.contains(aa_mass))
759 {
760 return perfect_shift_origin;
761 }
762 else
763 {
764 return current_row;
765 }
766}

References pappso::MzRange::contains(), pappso::specpeptidoms::SpOMSSpectrum::getMZShift(), and m_precision_ptr.

Referenced by updateAlignmentMatrix().

◆ postProcessingAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::postProcessingAlign ( const SpOMSSpectrum & spectrum,
const SpOMSProtein * protein_ptr,
std::size_t beginning,
std::size_t length,
const std::vector< double > & shifts )

performs the post-processing : generates corrected spectra and align them

Parameters
spectrumSpectrum to align
protein_ptrProtein pointer on the sequence to align.
beginningIndex of the beginning of the subsequence to align.
lengthLength of the subsequence to align.
shiftsList of potential precursor mass errors to test.

Definition at line 364 of file semiglobalalignment.cpp.

369{
370 std::size_t current_SPC = m_best_alignment.SPC;
371 int current_best_score = m_best_alignment.score;
373 for(double precursor_mass_error : shifts)
374 {
375 SpOMSSpectrum corrected_spectrum(spectrum, precursor_mass_error);
376 preciseAlign(corrected_spectrum, protein_ptr, beginning, length);
378 {
380 }
381 }
382 if(m_best_post_processed_alignment.SPC > current_SPC &&
383 m_best_post_processed_alignment.score >= current_best_score)
384 {
386 }
387}
void preciseAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, const std::size_t beginning, const std::size_t length)
performs the second alignment search between a protein subsequence and a spectrum.

References m_best_alignment, m_best_post_processed_alignment, and preciseAlign().

Referenced by pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment().

◆ preciseAlign()

void pappso::specpeptidoms::SemiGlobalAlignment::preciseAlign ( const SpOMSSpectrum & spectrum,
const SpOMSProtein * protein_ptr,
const std::size_t beginning,
const std::size_t length )

performs the second alignment search between a protein subsequence and a spectrum.

Parameters
spectrumSpectrum to align
protein_ptrProtein pointer on the sequence to align.
beginningIndex of the beginning of the subsequence to align.
lengthLength of the subsequence to align.

Definition at line 170 of file semiglobalalignment.cpp.

174{
175 try
176 {
177 qDebug();
178 const QString &protein_seq = protein_ptr->getSequence();
179 std::size_t length2;
180 if((qsizetype)(beginning + length) <= protein_seq.size())
181 {
182 length2 = length;
183 }
184 else
185 {
186 length2 = protein_seq.size() - beginning;
187 }
188
189 qDebug();
190 QString sequence_str = protein_seq.sliced(protein_seq.size() - beginning - length2, length2);
191
192 SpOMSProtein sequence("sub_sequence", sequence_str, m_aaCode);
193
194 // std::reverse(sequence.begin(), sequence.end());
195 std::vector<AaPosition> aa_positions;
196 CorrectionTree correction_tree;
197
198 initpreciseAlign(spectrum, length2);
199
200 for(std::size_t row_number = 1; row_number <= length2; row_number++)
201 {
202
203 qDebug() << "row_number - 1=" << row_number - 1 << " sequence.size()=" << sequence.size();
204 // aa = Aa(sequence[row_number - 1].unicode());
205 updateAlignmentMatrix(sequence,
206 row_number,
207 spectrum.getAaPositions(sequence[row_number - 1].code),
208 spectrum,
209 false,
210 protein_ptr);
211 }
212 qDebug();
213 saveBestAlignment(sequence, spectrum, protein_seq.size() - beginning);
214
215 qDebug() << m_scenario.getBestScore() << " " << MIN_ALIGNMENT_SCORE;
216 // Correction : if complementary peaks are used, corrected spectra without one of the two
217 // peaks are generated and aligned. The best alignment is kept.
218 if(m_scenario.getBestScore() >
219 MIN_ALIGNMENT_SCORE) // We only correct alignments with acceptable scores
220 {
221 // we only correct alignment if the sequence has a minimum amino acid diversity
222 if(checkSequenceDiversity(sequence.getSequence(), 5, 2))
223 {
224
225 qDebug();
227 for(std::size_t iter : m_best_alignment.peaks)
228 {
229 if(iter > spectrum.getComplementaryPeak(iter))
230 {
231 break;
232 }
233 else if(std::find(m_best_alignment.peaks.begin(),
234 m_best_alignment.peaks.end(),
235 spectrum.getComplementaryPeak(iter)) !=
236 m_best_alignment.peaks.end())
237 {
238 correction_tree.addPeaks(iter, spectrum.getComplementaryPeak(iter));
239 }
240 }
241 qDebug();
242 std::vector<std::vector<std::size_t>> corrections = correction_tree.getPeaks();
243 if(corrections.size() > 0)
244 {
245 m_best_alignment.score =
246 0; // Reset the best alignment score (we dont want to keep
247 // the original alignment if corrections are needed)
248 qDebug();
249 for(auto peaks_to_remove : corrections)
250 {
251 qDebug();
252 correctAlign(sequence,
253 protein_ptr,
254 spectrum,
255 peaks_to_remove,
256 protein_seq.size() - beginning);
257 qDebug();
258 }
259 qDebug();
261 }
262 }
263 }
264 else
265 {
266 // this sequence has too much redundancy
267 // we have to lower the score
268 m_best_alignment.score = 0;
269 }
270 qDebug();
271 }
272 catch(const pappso::PappsoException &err)
273 {
274 throw pappso::PappsoException(
275 QObject::tr("SemiGlobalAlignment::preciseAlign failed :\n%1").arg(err.qwhat()));
276 }
277}
void initpreciseAlign(const SpOMSSpectrum &spectrum, std::size_t length2)
function made for testing the preciseAlign process, initiate the variables for alignment
static bool checkSequenceDiversity(const QString &sequence, std::size_t window, std::size_t minimum_aa_diversity)
check that the sequence has a minimum of amino acid checkSequenceDiversity

References pappso::specpeptidoms::CorrectionTree::addPeaks(), checkSequenceDiversity(), correctAlign(), pappso::specpeptidoms::SpOMSSpectrum::getAaPositions(), pappso::specpeptidoms::SpOMSSpectrum::getComplementaryPeak(), pappso::specpeptidoms::CorrectionTree::getPeaks(), pappso::specpeptidoms::SpOMSProtein::getSequence(), initpreciseAlign(), m_aaCode, m_best_alignment, m_best_corrected_alignment, m_scenario, pappso::specpeptidoms::MIN_ALIGNMENT_SCORE(), pappso::PappsoException::qwhat(), saveBestAlignment(), and updateAlignmentMatrix().

Referenced by postProcessingAlign(), and pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment().

◆ saveBestAlignment()

void pappso::specpeptidoms::SemiGlobalAlignment::saveBestAlignment ( const SpOMSProtein & sequence,
const SpOMSSpectrum & spectrum,
std::size_t offset )
private

Stores the best alignment from m_scenario in m_best_alignment.

Parameters
sequencereversed sequence of the current alignment.
spectrumSpectrum currently being aligned.
offsetSize of the protein sequence minus beginning of the alignment. Used to compute the position of the alignment in the protein sequence.

Definition at line 821 of file semiglobalalignment.cpp.

825{
826 qDebug();
827 m_best_alignment.m_peptideModel.reset();
828 m_best_alignment.peaks.clear();
829 m_best_alignment.shifts.clear();
830 std::size_t previous_row = 0;
831 std::size_t previous_column = 0;
832 std::size_t perfect_shift_end;
833 std::pair<std::vector<ScenarioCell>, int> best_alignment = m_scenario.getBestAlignment();
834 m_best_alignment.score = best_alignment.second;
835 std::vector<SpOMSAa> skipped_aa;
836 double skipped_mass;
837 // Retrieving beginning and end
838 if(best_alignment.first.front().previous_row > offset)
839 {
840 throw pappso::PappsoException(
841 QString("best_alignment.first.front().previous_row > offset %1 %2")
842 .arg(offset)
843 .arg(best_alignment.first.front().previous_row));
844 }
845 if(best_alignment.first.back().previous_row > offset)
846 {
847 throw pappso::PappsoException(
848 QString("best_alignment.first.back().previous_row > offset %1 %2")
849 .arg(offset)
850 .arg(best_alignment.first.back().previous_row));
851 }
852 m_best_alignment.beginning = offset - best_alignment.first.front().previous_row;
853 m_best_alignment.end = offset - best_alignment.first.back().previous_row - 1;
854
855 qDebug();
856 AminoAcidModel aa_model;
857 aa_model.m_massDifference = 0;
858 // Filling temp_interpretation and peaks vectors
859 for(auto cell : best_alignment.first)
860 {
861 switch(cell.alignment_type)
862 {
863 case AlignType::found:
864 aa_model.m_aminoAcid = sequence.at(previous_row - 1).aa;
865 aa_model.m_massDifference = 0;
866 aa_model.m_skipped = false;
867 m_best_alignment.m_peptideModel.push_back(aa_model);
868 if(previous_row > cell.previous_row + 1)
869 {
870 skipped_mass = sequence.at(previous_row - 1)
871 .mass; // Aa(sequence.at(previous_row - 1).unicode()).getMass();
872 skipped_aa =
873 sequence.sliced(cell.previous_row, previous_row - cell.previous_row - 1);
874 aa_model.m_massDifference = 0;
875 aa_model.m_skipped = true;
876 for(auto aa : skipped_aa)
877 {
878 aa_model.m_aminoAcid = aa.aa;
879 m_best_alignment.m_peptideModel.push_back(aa_model);
880 skipped_mass += aa.mass; // Aa(aa.unicode()).getMass();
881 }
882 m_best_alignment.m_peptideModel.back().m_massDifference =
883 spectrum.getMZShift(cell.previous_column, previous_column) - skipped_mass;
884 }
885 m_best_alignment.peaks.push_back(cell.previous_column);
886 break;
888 aa_model.m_aminoAcid = sequence.at(previous_row - 1).aa;
889 aa_model.m_massDifference = 0;
890 aa_model.m_skipped = true;
891 m_best_alignment.m_peptideModel.push_back(aa_model);
892 break;
893 case AlignType::shift:
894
895 aa_model.m_aminoAcid = sequence.at(previous_row - 1).aa;
896 aa_model.m_massDifference = spectrum.getMZShift(cell.previous_column, previous_column) -
897 aa_model.m_aminoAcid.getMass();
898 aa_model.m_skipped = false;
899 m_best_alignment.m_peptideModel.push_back(aa_model);
900 m_best_alignment.peaks.push_back(cell.previous_column);
901 m_best_alignment.shifts.push_back(
902 spectrum.getMZShift(cell.previous_column, previous_column) -
903 sequence.at(previous_row - 1).mass);
904 break;
906 m_best_alignment.peaks.push_back(cell.previous_column);
907 skipped_aa = sequence.sliced(cell.previous_row, previous_row - cell.previous_row);
908 std::reverse(skipped_aa.begin(), skipped_aa.end());
909 aa_model.m_massDifference = 0;
910 aa_model.m_skipped = false;
911 for(auto aa : skipped_aa)
912 {
913 aa_model.m_aminoAcid = aa.aa;
914 m_best_alignment.m_peptideModel.push_back(aa_model);
915 }
916 break;
917 case AlignType::init:
918 previous_row = cell.previous_row;
919 previous_column = cell.previous_column;
920 m_best_alignment.peaks.push_back(cell.previous_column);
921 break;
922 }
923 previous_row = cell.previous_row;
924 previous_column = cell.previous_column;
925 }
926 std::reverse(m_best_alignment.peaks.begin(), m_best_alignment.peaks.end());
927 std::reverse(m_best_alignment.m_peptideModel.begin(), m_best_alignment.m_peptideModel.end());
928
929 qDebug();
930 // Compute begin_shift and end_shift
931 MzRange zero(0, m_precision_ptr);
932 m_best_alignment.begin_shift = spectrum.getMZShift(0, m_best_alignment.peaks.front());
933 m_best_alignment.end_shift = spectrum.getMissingMass(m_best_alignment.peaks.back());
934 if(zero.contains(m_best_alignment.end_shift))
935 {
936 m_best_alignment.end_shift = 0;
937 }
938
939 qDebug();
940 // Computing SPC
941 m_best_alignment.SPC = 0;
942 for(auto peak : m_best_alignment.peaks)
943 {
944 switch(spectrum.at(peak).type)
945 {
947 qDebug() << peak << "native";
948 m_best_alignment.SPC += 1;
949 break;
951 qDebug() << peak << "both";
952 m_best_alignment.SPC += 2;
953 break;
955 qDebug() << peak << "synthetic";
956 break;
958 qDebug() << peak << "symmetric";
959 m_best_alignment.SPC += 1;
960 break;
961 }
962 }
963
964 qDebug();
965 // Final check of the end shift
966 if(m_best_alignment.end_shift > 0)
967 {
968 perfect_shift_end = perfectShiftPossibleEnd(sequence,
969 spectrum,
970 best_alignment.first.front().previous_row,
971 m_best_alignment.peaks.back());
972 if(perfect_shift_end != best_alignment.first.front().previous_row)
973 {
974 skipped_aa =
975 sequence.sliced(best_alignment.first.front().previous_row,
976 perfect_shift_end - best_alignment.first.front().previous_row);
977 aa_model.m_massDifference = 0;
978 aa_model.m_skipped = true;
979 for(auto aa = skipped_aa.begin(); aa != skipped_aa.end(); aa++)
980 {
981 aa_model.m_aminoAcid = aa->aa;
982 m_best_alignment.m_peptideModel.push_back(aa_model);
983 }
984 m_best_alignment.beginning = offset - perfect_shift_end;
985 m_best_alignment.end_shift = 0;
986 }
987 else
988 {
990 }
991 }
992
993 qDebug();
994 // Writing final interpretation
995 if(m_best_alignment.end_shift > 0)
996 {
997 m_best_alignment.m_peptideModel.setNterShift(m_best_alignment.end_shift);
998 }
999
1000 std::reverse(m_best_alignment.m_peptideModel.begin(), m_best_alignment.m_peptideModel.end());
1001 if(m_best_alignment.begin_shift > 0)
1002 {
1003 m_best_alignment.m_peptideModel.setCterShift(m_best_alignment.begin_shift);
1004 }
1005
1006 m_best_alignment.m_peptideModel.setPrecursorMass(spectrum.getPrecursorMass());
1007 qDebug();
1008}
std::size_t perfectShiftPossibleEnd(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t end_row, std::size_t end_peak) const
indicates if a perfect shift is possible between the provided positions
@ aa
best possible : more than one direct MS2 fragmentation in same MSRUN
Definition types.h:45
@ synthetic
does not correspond to existing peak, for computational purpose
Definition types.h:85
@ both
both, the ion and the complement exists in the original spectrum
Definition types.h:83
@ symmetric
new peak : computed symmetric mass from a corresponding native peak
Definition types.h:81

References pappso::specglob::both, pappso::MzRange::contains(), pappso::specpeptidoms::found, pappso::specpeptidoms::foundShift, pappso::Aa::getMass(), pappso::specpeptidoms::SpOMSSpectrum::getMissingMass(), pappso::specpeptidoms::SpOMSSpectrum::getMZShift(), pappso::specpeptidoms::SpOMSSpectrum::getPrecursorMass(), pappso::specpeptidoms::init, pappso::specpeptidoms::AminoAcidModel::m_aminoAcid, m_best_alignment, pappso::specpeptidoms::AminoAcidModel::m_massDifference, m_precision_ptr, m_scenario, m_scorevalues, pappso::specpeptidoms::AminoAcidModel::m_skipped, pappso::specglob::native, pappso::specpeptidoms::notFound, pappso::specpeptidoms::perfectShift, perfectShiftPossibleEnd(), pappso::specpeptidoms::shift, pappso::specpeptidoms::SpOMSProtein::sliced(), pappso::specglob::symmetric, and pappso::specglob::synthetic.

Referenced by correctAlign(), and preciseAlign().

◆ updateAlignmentMatrix()

void pappso::specpeptidoms::SemiGlobalAlignment::updateAlignmentMatrix ( const pappso::specpeptidoms::SpOMSProtein & sequence,
const std::size_t row_number,
const std::vector< AaPosition > & aa_positions,
const SpOMSSpectrum & spectrum,
const bool fast_align,
const pappso::specpeptidoms::SpOMSProtein * protein_ptr )
private

updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenario.

Parameters
sequenceReversed sequence of the protein being aligned /!\ In case of a subsequence alignment, the provided protein must only contain the sequence to align.
row_numbernumber of the row to update (== index in sequence of the amino acid being aligned)
aa_positionslist of the AaPositions of the current amino acid
spectrumSpectrum being aligned
fast_alignWhether to use the fast version of the algorithm (for 1st alignemnt step)
protein_ptrProtein pointer on the sequence to align. /!\ In case of a subsequence alignment, the provided protein must be the protein of origin (i.e. complete sequence).

Definition at line 390 of file semiglobalalignment.cpp.

397{
398 int where = 0;
399 try
400 {
401 int score_found, score_shift, best_score, alt_score, tree_id;
402 uint32_t condition = 0;
403 std::size_t best_column, shift, beginning, missing_aas, length, perfect_shift_origin;
404 KeyCell *current_cell_ptr, *tested_cell_ptr;
405 AlignType alignment_type, temp_align_type;
406
407 double smallest_aa_mass = m_aaCode.getMass((std::uint8_t)1);
408
409 m_updated_cells.reserve(aa_positions.size());
410 where = 1;
411 // Computation of the threePeaks condition, see spomsspectrum.h for more details.
412 if(fast_align)
413 {
414 condition = 3;
415 if(row_number > 1)
416 {
417 qDebug() << (char)sequence.at(row_number - 2).aa;
418 qDebug() << "condition" << condition << "aa" << (char)sequence.at(row_number - 2).aa
419 << sequence.at(row_number - 2).code;
420 condition += 2 << sequence.at(row_number - 2).code;
421 qDebug();
422 qDebug() << "condition" << condition;
423 }
424 }
425 where = 2;
426 for(std::vector<AaPosition>::const_iterator aa_position = aa_positions.begin();
427 aa_position != aa_positions.end();
428 aa_position++)
429 {
430 qDebug() << "l_peak" << aa_position->l_peak << "r_peak" << aa_position->r_peak << "l_mass"
431 << aa_position->l_mass << "l_support" << aa_position->l_support << "condition"
432 << aa_position->condition;
433
434 where = 3;
435 if(((condition & aa_position->condition) != 0) ||
436 !fast_align) // Verification of the threePeaks condition (only during first alignment).
437 {
438 if(fast_align)
439 {
440 qDebug() << "threePeaks condition verified";
441 }
442
443 current_cell_ptr = &m_interest_cells.at(aa_position->r_peak);
444 if(spectrum.peakType(aa_position->r_peak) ==
445 specglob::ExperimentalSpectrumDataPointType::both) // The score used are different
446 // depending on the right peak
447 // type (simple or double).
448 {
449 qDebug() << "double peak";
450 score_found = m_scorevalues.get(ScoreType::foundDouble);
452 }
453 else
454 {
455 qDebug() << "single peak";
456 score_found = m_scorevalues.get(ScoreType::found);
457 score_shift = m_scorevalues.get(ScoreType::foundShift);
458 }
459
460 // not found case (always computed)
461 best_column = aa_position->r_peak;
462 best_score = current_cell_ptr->score + (row_number - current_cell_ptr->n_row) *
464 beginning = current_cell_ptr->beginning;
465 tree_id = current_cell_ptr->tree_id;
466 alignment_type = AlignType::notFound;
467 qDebug() << "not found" << best_score;
468
469 // found case (Can only happen if the left peak is supported)
470 if(aa_position->l_support)
471 {
472 tested_cell_ptr = &m_interest_cells.at(aa_position->l_peak);
473 if(aa_position->l_peak == 0)
474 {
475 alt_score = tested_cell_ptr->score + score_found;
476 }
477 else
478 {
479 if(tested_cell_ptr->n_row == row_number - 1)
480 {
481 alt_score = tested_cell_ptr->score +
482 (row_number - tested_cell_ptr->n_row - 1) *
484 score_found;
485 }
486 else
487 {
488 alt_score = tested_cell_ptr->score +
489 (row_number - tested_cell_ptr->n_row - 1) *
491 score_shift;
492 }
493 }
494 if(alt_score >= best_score) // If the found case improves the score, the new value
495 // of the key cell is computed.
496 {
497 alignment_type = AlignType::found;
498 best_score = alt_score;
499 best_column = aa_position->l_peak;
500 if(best_column == 0)
501 {
502 if(row_number < ALIGNMENT_SURPLUS)
503 {
504 beginning = 0;
505 }
506 else
507 {
508 beginning = std::max((std::size_t)(row_number - ALIGNMENT_SURPLUS),
509 (std::size_t)0);
510 }
511 if(fast_align)
512 {
513 tree_id = m_location_saver.getNextTree();
514 }
515 }
516 else
517 {
518 beginning = tested_cell_ptr->beginning;
519 tree_id = tested_cell_ptr->tree_id;
520 }
521 qDebug() << "found" << best_score << "from" << best_column << beginning
522 << tree_id;
523 }
524 }
525
526 where = 4;
527 // generic shift case (all shifts are tested)
528 if(aa_position->l_support)
529 {
530 shift = 1;
531 }
532 else
533 {
534 shift = 0;
535 }
536 while(shift < aa_position->l_peak)
537 {
538 tested_cell_ptr = &m_interest_cells.at(aa_position->l_peak - shift);
539 // verification saut parfait
540 if(perfectShiftPossible(sequence,
541 spectrum,
542 tested_cell_ptr->n_row,
543 row_number,
544 aa_position->l_peak - shift,
545 aa_position->r_peak) &&
547 {
548 alt_score = tested_cell_ptr->score +
549 (row_number - tested_cell_ptr->n_row - 1) *
551 score_found;
552 temp_align_type = AlignType::perfectShift;
553 }
554 else
555 {
556 alt_score = tested_cell_ptr->score +
557 (row_number - tested_cell_ptr->n_row - 1) *
559 score_shift;
560 temp_align_type = AlignType::shift;
561 }
562 if(alt_score > best_score)
563 {
564 alignment_type = temp_align_type;
565 best_score = alt_score;
566 best_column = aa_position->l_peak - shift;
567 beginning = tested_cell_ptr->beginning;
568 tree_id = tested_cell_ptr->tree_id;
569 qDebug() << "shift" << best_score << "from" << best_column;
570 }
571 shift++;
572 }
573
574 where = 5;
575 // case shift from column 0 (no penalties if all precedent amino acids are missed)
576 tested_cell_ptr = &m_interest_cells.at(0);
577 // verification saut parfait
578 if(aa_position->r_peak <= TOL_PEAKS_MISSING_FIRST_COLUMN)
579 {
580 perfect_shift_origin =
581 perfectShiftPossibleFrom0(sequence, spectrum, row_number, aa_position->r_peak);
582 }
583 else
584 {
585 perfect_shift_origin = row_number;
586 }
587
588 if(perfect_shift_origin != row_number)
589 {
590 alt_score = tested_cell_ptr->score + score_found;
591 temp_align_type = AlignType::perfectShift;
592 }
593 else
594 {
595 alt_score = tested_cell_ptr->score + score_shift;
596 temp_align_type = AlignType::shift;
597 }
598
599 where = 6;
600 if(alt_score > best_score)
601 {
602 qDebug() << "shift" << alt_score << "from 0";
603 alignment_type = temp_align_type;
604 best_score = alt_score;
605 best_column = 0;
606 missing_aas =
607 std::floor((aa_position->l_mass -
609 smallest_aa_mass);
610 qDebug() << "missing aas" << missing_aas;
611 if(row_number < ALIGNMENT_SURPLUS + missing_aas)
612 {
613 beginning = 0;
614 }
615 else
616 {
617 beginning =
618 std::max((std::size_t)(row_number - missing_aas - ALIGNMENT_SURPLUS),
619 (std::size_t)0);
620 }
621 where = 7;
622 if(fast_align)
623 {
624 tree_id = m_location_saver.getNextTree();
625 }
626 }
627
628 where = 8;
629 if(best_column != aa_position->r_peak)
630 {
631 m_updated_cells.push_back(
632 {aa_position->r_peak, {row_number, best_score, beginning, tree_id}});
633 }
634
635 where = 9;
636 if(best_score > m_location_saver.getMinScore(tree_id) && fast_align)
637 {
638 length =
639 row_number - beginning + 1 +
640 std::ceil(spectrum.getMissingMass(aa_position->r_peak) / smallest_aa_mass) +
642 where = 10;
643 m_location_saver.addLocation(beginning, length, tree_id, best_score, protein_ptr);
644 }
645 else if(!fast_align)
646 {
647
648 where = 11;
649 if(alignment_type == AlignType::perfectShift && best_column == 0)
650 {
651 m_scenario.saveOrigin(row_number,
652 aa_position->r_peak,
653 perfect_shift_origin,
654 0,
655 best_score,
657 }
658 else
659 {
660 m_scenario.saveOrigin(row_number,
661 aa_position->r_peak,
662 m_interest_cells.at(best_column).n_row,
663 best_column,
664 best_score,
665 alignment_type);
666 }
667 }
668 }
669 }
670
671 where = 30;
672 // Update row number in column 0
673 m_updated_cells.push_back({0, {row_number, 0, 0, 0}});
674
675 // Save updated key cells in the matrix
676 while(m_updated_cells.size() > 0)
677 {
678 qDebug() << m_interest_cells.size() << " " << m_updated_cells.back().first
679 << m_updated_cells.back().second.n_row << m_updated_cells.back().second.score
680 << m_updated_cells.back().second.beginning
681 << m_updated_cells.back().second.tree_id;
682 m_interest_cells.at(m_updated_cells.back().first) = m_updated_cells.back().second;
683 m_updated_cells.pop_back();
684 }
685 where++;
686 }
687 catch(const pappso::PappsoException &err)
688 {
689 throw pappso::PappsoException(
690 QObject::tr("updateAlignmentMatrix failed :\n%1").arg(err.qwhat()));
691 }
692 catch(const std::exception &error)
693 {
694 throw pappso::PappsoException(
695 QObject::tr("updateAlignmentMatrix failed std::exception :\n%1 %2")
696 .arg(where)
697 .arg(error.what()));
698 }
699}
bool perfectShiftPossible(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
indicates if a perfect shift is possible between the provided positions
std::size_t perfectShiftPossibleFrom0(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t current_row, const std::size_t r_peak) const
indicates if a perfect shift is possible from the spectrum beginning to the provided peak....
const uint ALIGNMENT_SURPLUS(5)
const uint TOL_PEAKS_MISSING(4)
const uint TOL_PEAKS_MISSING_FIRST_COLUMN(5)
const pappso_double MHPLUS(1.007276466879)
const pappso_double MPROTIUM(1.007825032241)
const pappso_double MASSOXYGEN(15.99491461956)

References pappso::specpeptidoms::ALIGNMENT_SURPLUS(), pappso::specpeptidoms::KeyCell::beginning, pappso::specglob::both, pappso::specpeptidoms::found, pappso::specpeptidoms::foundDouble, pappso::specpeptidoms::foundShift, pappso::specpeptidoms::foundShiftDouble, pappso::specpeptidoms::SpOMSSpectrum::getMissingMass(), m_aaCode, m_interest_cells, m_location_saver, m_scenario, m_scorevalues, m_updated_cells, pappso::MASSOXYGEN(), pappso::MHPLUS(), pappso::MPROTIUM(), pappso::specpeptidoms::KeyCell::n_row, pappso::specpeptidoms::notFound, pappso::specpeptidoms::SpOMSSpectrum::peakType(), pappso::specpeptidoms::perfectShift, perfectShiftPossible(), perfectShiftPossibleFrom0(), pappso::PappsoException::qwhat(), pappso::specpeptidoms::KeyCell::score, pappso::specpeptidoms::shift, pappso::specpeptidoms::TOL_PEAKS_MISSING(), pappso::specpeptidoms::TOL_PEAKS_MISSING_FIRST_COLUMN(), and pappso::specpeptidoms::KeyCell::tree_id.

Referenced by correctAlign(), fastAlign(), oneAlignStep(), and preciseAlign().

Member Data Documentation

◆ m_aaCode

const AaCode& pappso::specpeptidoms::SemiGlobalAlignment::m_aaCode
private

Definition at line 300 of file semiglobalalignment.h.

Referenced by SemiGlobalAlignment(), preciseAlign(), and updateAlignmentMatrix().

◆ m_best_alignment

Alignment pappso::specpeptidoms::SemiGlobalAlignment::m_best_alignment
private

◆ m_best_corrected_alignment

Alignment pappso::specpeptidoms::SemiGlobalAlignment::m_best_corrected_alignment
private

Definition at line 304 of file semiglobalalignment.h.

Referenced by correctAlign(), and preciseAlign().

◆ m_best_post_processed_alignment

Alignment pappso::specpeptidoms::SemiGlobalAlignment::m_best_post_processed_alignment
private

Definition at line 305 of file semiglobalalignment.h.

Referenced by postProcessingAlign().

◆ m_interest_cells

std::vector<KeyCell> pappso::specpeptidoms::SemiGlobalAlignment::m_interest_cells
private

◆ m_location_saver

LocationSaver pappso::specpeptidoms::SemiGlobalAlignment::m_location_saver
private

Definition at line 301 of file semiglobalalignment.h.

Referenced by getLocationSaver(), and updateAlignmentMatrix().

◆ m_precision_ptr

pappso::PrecisionPtr pappso::specpeptidoms::SemiGlobalAlignment::m_precision_ptr
private

◆ m_scenario

Scenario pappso::specpeptidoms::SemiGlobalAlignment::m_scenario
private

◆ m_scorevalues

const ScoreValues& pappso::specpeptidoms::SemiGlobalAlignment::m_scorevalues
private

◆ m_updated_cells

std::vector<std::pair<std::size_t, KeyCell> > pappso::specpeptidoms::SemiGlobalAlignment::m_updated_cells
private

Definition at line 296 of file semiglobalalignment.h.

Referenced by initFastAlign(), and updateAlignmentMatrix().

◆ min_score

const int pappso::specpeptidoms::SemiGlobalAlignment::min_score = 15
private

Definition at line 298 of file semiglobalalignment.h.


The documentation for this class was generated from the following files: