libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
sagetsvhandler.cpp
Go to the documentation of this file.
1/**
2 * \file input/sage/sagereader.h
3 * \date 21/08/2024
4 * \author Olivier Langella
5 * \brief read data files from Sage output
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2024 Olivier Langella
10 *<Olivier.Langella@universite-paris-saclay.fr>.
11 *
12 * This file is part of i2MassChroQ.
13 *
14 * i2MassChroQ is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * i2MassChroQ is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with i2MassChroQ. If not, see <http://www.gnu.org/licenses/>.
26 *
27 ******************************************************************************/
28
29
30#include "sagetsvhandler.h"
37#include <qregularexpression.h>
38
39
40namespace pappso
41{
42namespace cbor
43{
44namespace psm
45{
46
48 const SageReader &sage_reader,
49 PsmProteinMap &psm_protein_map)
50 : m_sageReader(sage_reader), m_psmProteinMap(psm_protein_map)
51{
52 mp_monitor = p_monitor;
55 m_decoyTag = sage_reader.getDecoyTag();
56}
57
61
62void
64{
65 for(auto it_sample_map : m_sampleMap)
66 {
67 writeSample(it_sample_map.second);
68 }
69}
70
71void
73{
74 if(m_line.charge != 0)
75 recordLine();
77}
78
79void
83
84void
85SageTsvHandler::setCell(const OdsCell &cell)
86{
87 if(m_lineNumber == 0)
88 {
89 // header
90 QString tag = cell.getStringValue();
91 if(tag == "psm_id")
92 {
94 }
95 else if(tag == "peptide")
96 {
98 }
99 else if(tag == "proteins")
100 {
102 }
103 else if(tag == "protein_groups")
104 {
106 }
107 else if(tag == "num_protein_groups")
108 {
110 }
111 else if(tag == "num_proteins")
112 {
114 }
115 else if(tag == "filename")
116 {
118 }
119 else if(tag == "scannr")
120 {
122 }
123 else if(tag == "rank")
124 {
126 }
127 else if(tag == "label")
128 {
130 }
131 else if(tag == "expmass")
132 {
134 }
135
136 else if(tag == "calcmass")
137 {
139 }
140 else if(tag == "charge")
141 {
143 }
144 else if(tag == "peptide_len")
145 {
147 }
148 else if(tag == "missed_cleavages")
149 {
151 }
152 else if(tag == "semi_enzymatic")
153 {
155 }
156 else if(tag == "isotope_error")
157 {
159 }
160 else if(tag == "precursor_ppm")
161 {
163 }
164 else if(tag == "fragment_ppm")
165 {
167 }
168 else if(tag == "hyperscore")
169 {
171 }
172 else if(tag == "delta_next")
173 {
175 }
176 else if(tag == "delta_best")
177 {
179 }
180 else if(tag == "rt")
181 {
182 m_columnTypeList.push_back(Columns::rt);
183 }
184 else if(tag == "aligned_rt")
185 {
187 }
188 else if(tag == "predicted_rt")
189 {
191 }
192 else if(tag == "delta_rt_model")
193 {
195 }
196 else if(tag == "ion_mobility")
197 {
199 }
200 else if(tag == "predicted_mobility")
201 {
203 }
204 else if(tag == "delta_mobility")
205 {
207 }
208 else if(tag == "matched_peaks")
209 {
211 }
212 else if(tag == "longest_b")
213 {
215 }
216 else if(tag == "longest_y")
217 {
219 }
220 else if(tag == "longest_y_pct")
221 {
223 }
224 else if(tag == "matched_intensity_pct")
225 {
227 }
228 else if(tag == "scored_candidates")
229 {
231 }
232 else if(tag == "poisson")
233 {
235 }
236 else if(tag == "sage_discriminant_score")
237 {
239 }
240 else if(tag == "posterior_error")
241 {
243 }
244 else if(tag == "spectrum_q")
245 {
247 }
248 else if(tag == "peptide_q")
249 {
251 }
252 else if(tag == "protein_q")
253 {
255 }
256 else if(tag == "protein_group_q")
257 {
259 }
260 else if(tag == "ms2_intensity")
261 {
263 }
264 else
265 {
266 throw pappso::ExceptionNotPossible(QObject::tr("column \"%1\" not defined").arg(tag));
267 }
268 }
269 else
270 {
271 if(m_columnNumber >= m_columnTypeList.size())
272 {
274 QObject::tr("the value %1 is out of range").arg(cell.getStringValue()));
275 }
277 switch(column_type)
278 {
279 case Columns::psm_id:
280 break;
282 break;
284 break;
286 break;
287 case Columns::peptide:
288 parsePeptide(cell.toString());
289 break;
291 parseProteins(cell.toString());
292 break;
294 if((std::size_t)cell.getDoubleValue() != (std::size_t)m_proteinList.size())
295 {
297 QObject::tr("column \"num_proteins\"!=%1").arg(m_proteinList.size()));
298 }
299 break;
301 parseMsRunFilename(cell.toString());
302 break;
303 case Columns::scannr:
304 parseScanNrColumn(cell.toString());
305 break;
306 case Columns::rank:
307 m_line.rank = cell.getDoubleValue();
308 break;
309 case Columns::label:
310 m_line.label = cell.getDoubleValue();
311 break;
312 case Columns::expmass:
313 m_line.expmass = cell.getDoubleValue();
314 break;
316 m_line.calcmass = cell.getDoubleValue();
317 break;
318 case Columns::charge:
319 m_line.charge = cell.getDoubleValue();
320 break;
322 m_line.peptide_len = cell.getDoubleValue();
323 break;
325 m_line.missed_cleavages = cell.getDoubleValue();
326 break;
328 m_line.semi_enzymatic = cell.getDoubleValue();
329 break;
331 m_line.isotope_error = cell.getDoubleValue();
332 break;
333
335 m_line.precursor_ppm = cell.getDoubleValue();
336 break;
338 m_line.fragment_ppm = cell.getDoubleValue();
339 break;
341 m_line.hyperscore = cell.getDoubleValue();
342 break;
344 m_line.delta_next = cell.getDoubleValue();
345 break;
347 m_line.delta_best = cell.getDoubleValue();
348 break;
349 case Columns::rt:
350 m_line.rt = cell.getDoubleValue() * 60; // to convert retention time in seconds
351 break;
353 m_line.aligned_rt = cell.getDoubleValue();
354 break;
356 m_line.predicted_rt = cell.getDoubleValue();
357 break;
359 m_line.delta_rt_model = cell.getDoubleValue();
360 break;
362 m_line.ion_mobility = cell.getDoubleValue();
363 break;
365 m_line.predicted_mobility = cell.getDoubleValue();
366 break;
368 m_line.delta_mobility = cell.getDoubleValue();
369 break;
371 m_line.matched_peaks = cell.getDoubleValue();
372 break;
374 m_line.longest_b = cell.getDoubleValue();
375 break;
377 m_line.longest_y = cell.getDoubleValue();
378 break;
380 m_line.longest_y_pct = cell.getDoubleValue();
381 break;
383 m_line.matched_intensity_pct = cell.getDoubleValue();
384 break;
386 m_line.scored_candidates = cell.getDoubleValue();
387 break;
388 case Columns::poisson:
389 m_line.poisson = cell.getDoubleValue();
390 break;
392 m_line.sage_discriminant_score = cell.getDoubleValue();
393 break;
395 m_line.posterior_error = cell.getDoubleValue();
396 break;
398 m_line.spectrum_q = cell.getDoubleValue();
399 break;
401 m_line.peptide_q = cell.getDoubleValue();
402 break;
404 m_line.protein_q = cell.getDoubleValue();
405 break;
407 m_line.ms2_intensity = cell.getDoubleValue();
408 break;
409 default:
410 qDebug() << "m_line.calcmass=" << m_line.calcmass;
412 QObject::tr("column type %1 not implemented").arg((std::uint8_t)column_type));
413 break;
414 }
415
416 /*
417
418 2333 TMISDSDYTEFENFTK
419 GRMZM2G018197_P01;GRMZM2G068952_P01;GRMZM5G822976_P01 3
420 20120906_balliau_extract_1_A01_urnb-1.mzML controllerType=0
421 controllerNumber=1 scan=12542 1 1 1926.8225 1926.8193 2 16 0 0
422 0.0 1.6471838 1.9796097 54.06803492297634 28.049970383419556 0.0 38.192993
423 0.76385987 0.7671368 0.0032769442 0.0 0.0 0.0 16 2 14 0.875 32.54396 380
424 -13.375352220427656 1.1570586 -34.13482 0.00016041065 0.00022231363
425 0.00040124074 1271951.1
426 */
427 }
429}
430
431void
433{
434 m_columnNumber = 0;
435 msp_peptide = nullptr;
436 m_proteinList.clear();
437 m_line = Line();
438}
439
440void
444void
445SageTsvHandler::startSheet(const QString &sheet_name [[maybe_unused]])
446{
447 m_columnNumber = 0;
448 m_lineNumber = 0;
449 mp_monitor->setStatus(QObject::tr("reading Sage TSV file"));
450
451 if(mp_monitor->shouldIstop())
452 {
453 throw pappso::ExceptionInterrupted(QObject::tr("Sage TSV data reading process interrupted"));
454 }
455}
456
457void
458SageTsvHandler::parsePeptide(const QString &peptide_str)
459{
460 qDebug();
461 QString peptide_str_verif = peptide_str;
462 // fixed modifications :
464 {
465 qDebug() << modif.strModification;
466 qDebug() << modif.modification->getAccession();
467 peptide_str_verif = peptide_str_verif.replace(
468 modif.strModification, QString("[%1]").arg(modif.modification->getAccession()));
469 }
470 // variable modifications :
472 {
473 qDebug() << modif.strModification;
474 qDebug() << modif.modification->getAccession();
475 peptide_str_verif = peptide_str_verif.replace(
476 modif.strModification, QString("[%1]").arg(modif.modification->getAccession()));
477 }
478
479 qDebug() << peptide_str_verif;
480 // LPMFGC[+57.0216]NDATQVYK
482 qDebug();
483 // variable modifications :
484 /*
485 setVariableModifications(peptide_sp,
486 peptide_line.peptide_string_list.at(6));
487*/
488 qDebug() << msp_peptide.get()->toProForma();
489}
490
491
492void
493SageTsvHandler::parseProteins(const QString &proteins_str)
494{
495 m_proteinList.clear();
496 m_proteinList = proteins_str.split(";");
497 for(QString accession : m_proteinList)
498 {
499
500 PsmProtein psm_protein;
501 psm_protein.protein_sp = std::make_shared<pappso::Protein>(accession, "");
502 psm_protein.isTarget = true;
503 if(accession.startsWith(m_decoyTag))
504 {
505 psm_protein.isTarget = false;
506 }
507
508 m_psmProteinMap.insert(psm_protein);
509 }
510}
511
512bool
513SageTsvHandler::parseScanNrColumn(const QString &spectrum_string_id)
514{
515 m_spectrumNativeId = spectrum_string_id;
516 qDebug() << spectrum_string_id;
517 m_scanNumber = 0;
518 bool is_ok = false;
519 m_scanNumberIsOk = false;
520 // controllerType=0 controllerNumber=1 scan=176056
521 if(mp_currentSample->name.endsWith(".d"))
522 { // assume this is a Bruker's timTOF sample
523 // 200ngHeLaPASEF_2min_compressed.d 2005
524 std::size_t precursor_number = spectrum_string_id.toULongLong(&is_ok);
525 if(is_ok)
526 {
527 // precursor=2006 idxms2=4011
528 m_scanNumberIsOk = is_ok;
529 m_spectrumIndex = precursor_number * 2 + 1;
530 m_spectrumNativeId = QString("precursor=%1 idxms2=%2")
531 .arg(precursor_number + 1)
532 .arg(precursor_number * 2 + 1);
533 }
534 }
535
536 if(!is_ok)
537 {
538 QStringList scan_list = spectrum_string_id.split("scan=");
539 if(scan_list.size() == 2)
540 {
541 // we bet that there is a scan number, easy to parse
542 m_scanNumber = scan_list.at(1).toULongLong(&is_ok);
543 m_scanNumberIsOk = is_ok;
544 if(m_scanNumber > 0)
546 }
547 if(is_ok == false)
548 {
549 ;
550 QStringList scan_list = spectrum_string_id.split(QRegularExpression("[^\\d]"));
551 if(scan_list.size() == 2)
552 {
553 m_scanNumber = scan_list.at(0).toULongLong(&is_ok);
554 m_scanNumberIsOk = is_ok;
555 if(m_scanNumber > 0)
557 }
558 /*
559 if(msp_previousMsrun != msp_msrun)
560 {
561 mp_monitor->setStatus(
562 QObject::tr("Reading mz data file %1").arg(msp_msrun.get()->getFileName()));
563 msp_previousMsrun = msp_msrun;
564 }
565
566 pappso::MsRunReader *msrunreader_p = msp_msrun.get()->getMsRunReaderSPtr().get();
567 if(msrunreader_p->getMsRunId()->getMsDataFormat() == pappso::MsDataFormat::brukerTims)
568 {
569 m_spectrumIndex = spectrum_string_id.toInt() * 2 - 1;
570 }
571 else
572 {
573 m_spectrumIndex =
574 msrunreader_p->spectrumStringIdentifier2SpectrumIndex(spectrum_string_id);
575 }*/
576 }
577 }
578 qDebug() << spectrum_string_id;
579 return is_ok;
580}
581
582void
583SageTsvHandler::parseMsRunFilename(const QString &msrun_filename)
584{
585
586
587 // find the sample :
588 auto it_insert = m_sampleMap.insert({msrun_filename, {}});
589 mp_currentSample = &(it_insert.first->second);
590 if(it_insert.second)
591 {
592 // new sample
593 it_insert.first->second.name = msrun_filename;
594 QCborMap ms_file;
595 QCborMap identification_file;
596
597 it_insert.first->second.cbor_core_sample.insert(QString("name"),
598 QFileInfo(msrun_filename).baseName());
599
600 // identification_file_list
601 QCborArray identification_file_list;
602 identification_file.insert(QString("name"), m_sageReader.getmJsonAbsoluteFilePath());
603 identification_file_list.push_back(identification_file);
604 it_insert.first->second.cbor_core_sample.insert(QString("identification_file_list"),
605 identification_file_list);
606
607
608 ms_file.insert(QString("name"), m_sageReader.getMzmlPath(msrun_filename));
609 it_insert.first->second.cbor_core_sample.insert(QString("peaklist_file"), ms_file);
610 }
611
612 /*
613 msp_msrun = m_sageReader.getSageFileReader().getMsRunSpWithFileName(msrun_filename);
614 qDebug() << msp_msrun.get()->getFileName();
615
616 msp_identificationSageJsonFileSp =
617 m_sageReader.getSageFileReader().getIdentificationSageJsonFileSpWithFileName(msrun_filename);
618
619 mp_identificationGroup =
620 m_sageReader.getSageFileReader().getIdentificationGroupPtrWithFileName(msrun_filename);
621 qDebug() << msp_msrun.get()->getFileName();
622 */
623}
624
625void
627{
628 qDebug();
629
630 for(const QString &accession : m_proteinList)
631 {
632 PsmProtein psm_protein;
633 psm_protein.protein_sp = std::make_shared<pappso::Protein>(accession, "");
634
635 auto it = m_psmProteinMap.insert(psm_protein);
636 it.first->second.cborEval.insert(QString("protein_q"), m_line.protein_q);
637 }
638 /*
639 PeptideEvidence pe(msp_msrun.get(), m_spectrumIndex, true);
640 pe.setCharge(m_line.charge);
641 pe.setChecked(true);
642 pe.setExperimentalMass(m_line.expmass);
643 pe.setPeptideXtpSp(msp_peptide);
644 pe.setIdentificationDataSource(msp_identificationSageJsonFileSp.get());
645 pe.setIdentificationEngine(m_identificationEngine);
646 pe.setRetentionTime(m_line.rt);
647 pe.setParam(PeptideEvidenceParam::tandem_hyperscore, m_line.hyperscore);
648 pe.setParam(PeptideEvidenceParam::sage_sage_discriminant_score,
649 m_line.sage_discriminant_score);
650 pe.setParam(PeptideEvidenceParam::sage_peptide_q, m_line.peptide_q);
651 pe.setParam(PeptideEvidenceParam::sage_posterior_error, m_line.posterior_error);
652 pe.setParam(PeptideEvidenceParam::sage_spectrum_q, m_line.spectrum_q);
653 pe.setParam(PeptideEvidenceParam::sage_predicted_rt, m_line.predicted_rt);
654 pe.setParam(PeptideEvidenceParam::sage_isotope_error, m_line.isotope_error);
655
656
657 PeptideMatch peptide_match;
658 // peptide_match.setStart(mz_peptide_evidence.start);
659 peptide_match.setPeptideEvidenceSp(
660 msp_identificationSageJsonFileSp.get()->getPeptideEvidenceStore().getInstance(&pe));
661 */
662
663 // find the scan in sample
664 auto it_insert = mp_currentSample->scan_map.insert({m_spectrumNativeId, Scan()});
665 Scan *current_cbor_scan_p = &(it_insert.first->second);
666 if(it_insert.second)
667 {
668 // new scan
669 QCborMap &scan_id = it_insert.first->second.cbor_id;
671 {
672 scan_id.insert(QString("index"), (qint64)m_spectrumIndex);
673 if(m_scanNumber > 0)
674 scan_id.insert(QString("scan"), (qint64)m_scanNumber);
675 }
676 scan_id.insert(QString("native_id"), m_spectrumNativeId);
677
678 QCborMap &scan_ms2 = it_insert.first->second.cbor_ms2;
679 scan_ms2.insert(QString("rt"), m_line.rt);
680
681 QCborMap &scan_precursor = it_insert.first->second.cbor_precursor;
682 scan_precursor.insert(QString("z"), m_line.charge);
683 double mh = m_line.expmass + MHPLUS;
684 scan_precursor.insert(QString("mh"), mh);
685 double exp_mz = (m_line.expmass + (MHPLUS * m_line.charge)) / m_line.charge;
686 scan_precursor.insert(QString("mz"), exp_mz);
687 }
688
689 Psm one_psm;
690 one_psm.peptide_sequence_li = msp_peptide.get()->getSequenceLi();
691 one_psm.proforma = msp_peptide.get()->toProForma();
692 one_psm.protein_list = m_proteinList;
693
694
695 one_psm.cbor_eval.insert(QString("aligned_rt"), m_line.aligned_rt);
696 one_psm.cbor_eval.insert(QString("calcmass"), m_line.calcmass);
697 one_psm.cbor_eval.insert(QString("delta_best"), m_line.delta_best);
698 one_psm.cbor_eval.insert(QString("delta_mobility"), m_line.delta_mobility);
699 one_psm.cbor_eval.insert(QString("delta_next"), m_line.delta_next);
700 one_psm.cbor_eval.insert(QString("delta_rt_model"), m_line.delta_rt_model);
701 one_psm.cbor_eval.insert(QString("fragment_ppm"), m_line.fragment_ppm);
702 one_psm.cbor_eval.insert(QString("hyperscore"), m_line.hyperscore);
703 one_psm.cbor_eval.insert(QString("ion_mobility"), m_line.ion_mobility);
704 one_psm.cbor_eval.insert(QString("isotope_error"), m_line.isotope_error);
705 one_psm.cbor_eval.insert(QString("label"), m_line.label);
706 one_psm.cbor_eval.insert(QString("longest_b"), (qint64)m_line.longest_b);
707 one_psm.cbor_eval.insert(QString("longest_y"), (qint64)m_line.longest_y);
708 one_psm.cbor_eval.insert(QString("longest_y_pct"), m_line.longest_y_pct);
709 one_psm.cbor_eval.insert(QString("matched_intensity_pct"), m_line.matched_intensity_pct);
710 one_psm.cbor_eval.insert(QString("matched_peaks"), (qint64)m_line.matched_peaks);
711 one_psm.cbor_eval.insert(QString("missed_cleavages"), m_line.missed_cleavages);
712 one_psm.cbor_eval.insert(QString("ms2_intensity"), m_line.ms2_intensity);
713 one_psm.cbor_eval.insert(QString("peptide_len"), (qint64)m_line.peptide_len);
714 one_psm.cbor_eval.insert(QString("peptide_q"), m_line.peptide_q);
715 one_psm.cbor_eval.insert(QString("poisson"), m_line.poisson);
716 one_psm.cbor_eval.insert(QString("posterior_error"), m_line.posterior_error);
717 one_psm.cbor_eval.insert(QString("precursor_ppm"), m_line.precursor_ppm);
718 one_psm.cbor_eval.insert(QString("predicted_mobility"), m_line.predicted_mobility);
719 one_psm.cbor_eval.insert(QString("predicted_rt"), m_line.predicted_rt);
720 one_psm.cbor_eval.insert(QString("protein_q"), m_line.protein_q);
721 one_psm.cbor_eval.insert(QString("rank"), m_line.rank);
722 one_psm.cbor_eval.insert(QString("sage_discriminant_score"), m_line.sage_discriminant_score);
723 one_psm.cbor_eval.insert(QString("scored_candidates"), (qint64)m_line.scored_candidates);
724 one_psm.cbor_eval.insert(QString("semi_enzymatic"), m_line.semi_enzymatic);
725 one_psm.cbor_eval.insert(QString("spectrum_q"), m_line.spectrum_q);
726
727 current_cbor_scan_p->psm_list.emplace_back(one_psm);
728
729
730 std::size_t progress = m_lineNumber / 10000;
731 if(progress > m_progressIndex)
732 {
733 if(mp_monitor->shouldIstop())
734 {
736 QObject::tr("Sage TSV data reading process interrupted"));
737 }
738 m_progressIndex = progress;
739 mp_monitor->setStatus(QString("%1K ").arg(m_progressIndex * 10));
740 }
741}
742
743void
745{
746 m_sageReader.getCborStreamWriter().startMap();
747 m_sageReader.getCborStreamWriter().append("name");
748 one_sample.cbor_core_sample.value("name").toCbor(m_sageReader.getCborStreamWriter());
749
750 m_sageReader.getCborStreamWriter().append("identification_file_list");
751 one_sample.cbor_core_sample.value("identification_file_list")
752 .toCbor(m_sageReader.getCborStreamWriter());
753
754
755 m_sageReader.getCborStreamWriter().append("peaklist_file");
756 one_sample.cbor_core_sample.value("peaklist_file").toCbor(m_sageReader.getCborStreamWriter());
757 //"scan_list": [
758
759 m_sageReader.getCborStreamWriter().append("scan_list");
760 m_sageReader.getCborStreamWriter().startArray(one_sample.scan_map.size());
761 for(auto &it_scan : one_sample.scan_map)
762 {
763 writeScan(it_scan.second);
764 }
765 m_sageReader.getCborStreamWriter().endArray();
766
767
768 m_sageReader.getCborStreamWriter().endMap();
769}
770
771void
773{
774 m_sageReader.getCborStreamWriter().startMap();
775 m_sageReader.getCborStreamWriter().append("id");
776 QCborValue(one_scan.cbor_id).toCbor(m_sageReader.getCborStreamWriter());
777 m_sageReader.getCborStreamWriter().append("precursor");
778 QCborValue(one_scan.cbor_precursor).toCbor(m_sageReader.getCborStreamWriter());
779 m_sageReader.getCborStreamWriter().append("ms2");
780 QCborValue(one_scan.cbor_ms2).toCbor(m_sageReader.getCborStreamWriter());
781
782 m_sageReader.getCborStreamWriter().append("psm_list");
783 m_sageReader.getCborStreamWriter().startArray(one_scan.psm_list.size());
784 for(auto &it_psm : one_scan.psm_list)
785 {
786 writePsm(it_psm);
787 }
788 m_sageReader.getCborStreamWriter().endArray();
789
790 m_sageReader.getCborStreamWriter().endMap();
791}
792
793void
795{
796 m_sageReader.getCborStreamWriter().startMap();
797 m_sageReader.getCborStreamWriter().append("proforma");
798 m_sageReader.getCborStreamWriter().append(one_psm.proforma);
799 m_sageReader.getCborStreamWriter().append("protein_list");
800
801 QCborArray cbor_protein_list;
802 for(const QString &accession : one_psm.protein_list)
803 {
804 // qWarning() << "accession=" << accession;
805 QCborMap cbor_protein;
806 cbor_protein.insert(QString("accession"), accession);
807
808
809 // start/end positions
810 QString protein_sequence =
811 QString(m_psmProteinMap.getByAccession(accession).protein_sp.get()->getSequence())
812 .replace("L", "I");
813 int position = protein_sequence.indexOf(one_psm.peptide_sequence_li);
814
815 QCborArray positions;
816 while(position >= 0)
817 {
818 positions.push_back(position);
819 position = protein_sequence.indexOf(one_psm.peptide_sequence_li, position + 1);
820 }
821
822 cbor_protein.insert(QString("positions"), positions);
823
824 cbor_protein_list.append(cbor_protein);
825 }
826
827
828 QCborValue(cbor_protein_list).toCbor(m_sageReader.getCborStreamWriter());
829
830 m_sageReader.getCborStreamWriter().append("eval");
831 m_sageReader.getCborStreamWriter().startMap();
832 m_sageReader.getCborStreamWriter().append("sage");
833 QCborValue(one_psm.cbor_eval).toCbor(m_sageReader.getCborStreamWriter());
834 m_sageReader.getCborStreamWriter().endMap();
835
836 m_sageReader.getCborStreamWriter().endMap();
837}
838
839
840} // namespace psm
841} // namespace cbor
842} // namespace pappso
static PeptideSp parseString(const QString &pepstr)
store PsmProtein in a map with accession as key
std::vector< SageModification > getStaticModificationList() const
std::vector< SageModification > getVariableModificationList() const
std::vector< SageReader::SageModification > m_staticModificationList
void parseProteins(const QString &proteins_str)
virtual void endSheet() override
pappso::UiMonitorInterface * mp_monitor
virtual void startSheet(const QString &sheet_name) override
std::vector< SageReader::SageModification > m_variableModificationList
void parsePeptide(const QString &peptide_str)
void writeSample(const Sample &one_sample)
virtual void setCell(const OdsCell &cell) override
void writePsm(const Psm &one_psm)
SageTsvHandler(pappso::UiMonitorInterface *p_monitor, const SageReader &sage_reader, PsmProteinMap &psm_protein_map)
void writeScan(const Scan &one_scan)
std::map< QString, Sample > m_sampleMap
virtual void endDocument() override
virtual void startLine() override
void parseMsRunFilename(const QString &msrun_filename)
bool parseScanNrColumn(const QString &spectrum_string_id)
virtual void endLine() override
std::vector< Columns > m_columnTypeList
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
const pappso_double MHPLUS(1.007276466879)
std::shared_ptr< Protein > protein_sp