libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
psmfilereaderbase.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/psm/psmfilereaderbase.h
3 * \date 05/07/2025
4 * \author Olivier Langella
5 * \brief Base class to read CBOR PSM file
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "psmfilereaderbase.h"
30#include <QDebug>
32#include <QCborArray>
33#include <qlogging.h>
34#include <qobject.h>
35
36
37namespace pappso
38{
39namespace cbor
40{
41namespace psm
42{
43
47
49{
50 // qWarning() << "~PsmFileReaderBase";
51}
52
53
54void
56{
57
58 qDebug();
59 initCborReader(cborp);
60
61 qDebug();
62 if(mpa_cborReader->isMap())
63 {
64 readRoot(monitor);
65 }
66 qDebug();
67}
68
69void
71{
72 qDebug();
73 initCborReader(cborp);
74
75 qDebug();
76 if(mpa_cborReader->isMap())
77 {
78 readRoot(monitor);
79 }
80 qDebug();
81}
82
83
84bool
86{
87 for(auto &it : m_currentPsmProteinRefList)
88 {
89 if(!m_proteinMap.getByAccession(it.accession).isTarget)
90 return true;
91 }
92 return false;
93}
94
95bool
97{
98 for(auto &it : m_currentPsmProteinRefList)
99 {
100 if(m_proteinMap.getByAccession(it.accession).isTarget)
101 return true;
102 }
103 return false;
104}
105
106
107void
109{
110 qDebug();
111 mpa_cborReader->enterContainer();
112
114 if(m_expectedString == "informations")
115 {
116 qDebug() << m_expectedString;
117 readInformations(monitor);
119
120 qDebug() << m_expectedString;
121 if(m_expectedString == "log")
122 {
123 qDebug() << m_expectedString;
124 readLog(monitor);
126 }
127
128 logReady(monitor);
129 }
130 else
131 {
132 throw pappso::PappsoException("ERROR: expecting informations element");
133 }
134
135 qDebug() << m_expectedString;
136
137 if(m_expectedString == "parameter_map")
138 {
139 qDebug();
140 readParameterMap(monitor);
141 }
142 else
143 {
144 throw pappso::PappsoException("ERROR: expecting parameter_map element");
145 }
146
147
149 m_targetFastaFiles.clear();
150 m_decoyFastaFiles.clear();
151 if(m_expectedString == "target_fasta_files")
152 {
155 }
156
157 if(m_expectedString == "decoy_fasta_files")
158 {
161 }
162 fastaFilesReady(monitor);
163
164 if(m_expectedString == "protein_map")
165 {
166 readProteinMap(monitor);
168 }
169
170 if(m_expectedString == "sample_list")
171 {
172 sampleListStarted(monitor);
173 mpa_cborReader->enterContainer(); // array
174 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
175 {
176 readSample(monitor);
177 }
178 mpa_cborReader->leaveContainer(); // array
179 sampleListFinished(monitor);
180 }
181 else
182 {
184 QObject::tr("ERROR: expecting sample_list element not %1").arg(m_expectedString));
185 }
186 mpa_cborReader->leaveContainer(); // whole file
187 if(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
188 {
189 readRoot(monitor);
190 }
191}
192
193void
195{
196 bool is_ok;
197 // m_cborInformations.clear();
198 is_ok = mpa_cborReader->readCborMap(m_cborInformations);
199
200 if(!is_ok)
201 {
202 throw pappso::PappsoException("ERROR: PSM cbor header informations not well formed");
203 }
204 qDebug() << m_cborInformations.keys();
205 if(m_cborInformations.value("type").toString() != "psm")
206 {
207 QStringList all_keys;
208 for(auto it_k : m_cborInformations.keys())
209 {
210 all_keys << it_k.toString();
211 }
213 QObject::tr("ERROR: this file does not contain PSM data but %1 and %2")
214 .arg(m_cborInformations.value("type").toString())
215 .arg(all_keys.join(" ")));
216 }
217 informationsReady(monitor);
218}
219
220void
222{
223 bool is_ok;
224 // m_cborInformations.clear();
225 is_ok = mpa_cborReader->readCborArray(m_cborLog);
226
227 if(!is_ok)
228 {
229 throw pappso::PappsoException("ERROR: PSM cbor header log not well formed");
230 }
231}
232
233
234void
236{
237 bool is_ok;
238 m_cborParameterMap.clear();
239 is_ok = mpa_cborReader->readCborMap(m_cborParameterMap);
240
241 if(!is_ok)
242 {
243 throw pappso::PappsoException("ERROR: PSM cbor parameter_map not well formed");
244 }
245 parameterMapReady(monitor);
246}
247
248void
254
255
258{
259 PsmProteinRef protein_ref;
260 protein_ref.accession = "";
261 protein_ref.positions.clear();
262 mpa_cborReader->enterContainer();
264 qDebug() << m_expectedString;
265 if(m_expectedString == "accession")
266 {
267 is_ok = mpa_cborReader->decodeString(protein_ref.accession);
268 if(!is_ok)
269 {
270 throw pappso::PappsoException("ERROR: protein accession is not a string");
271 }
272 }
273 else
274 {
275 throw pappso::PappsoException("ERROR: expecting accession element in PSM protein_list");
276 }
277
279 qDebug() << m_expectedString;
280 if(m_expectedString == "positions")
281 {
282 mpa_cborReader->readArray(protein_ref.positions);
283
284 // mpa_cborReader->next();
285 }
286 else
287 {
289 QString("ERROR: expecting positions element in PSM protein_list not %1")
290 .arg(m_expectedString));
291 }
292 mpa_cborReader->leaveContainer();
293
294 qDebug() << "end";
295 return protein_ref;
296}
297
298
301{
302 is_ok = false;
303 PsmFile file;
304 mpa_cborReader->enterContainer();
306 if(m_expectedString == "name")
307 {
308 if(!mpa_cborReader->decodeString(file.name))
309 {
310 throw pappso::PappsoException("file name is not a string");
311 }
312 is_ok = true;
313 }
314 else
315 {
316 throw pappso::PappsoException("ERROR: expecting name element in file");
317 }
318 mpa_cborReader->leaveContainer();
319 return file;
320}
321
322
323void
325{
326 writer.startMap();
327 writer.append("name");
328 writer.append(psm_file.name);
329 writer.endMap();
330}
331
332void
334 const std::vector<PsmFile> &file_list)
335{
336 writer.startArray();
337 for(auto &psm_file : file_list)
338 {
339 writePsmFile(writer, psm_file);
340 }
341 writer.endArray();
342}
343
344
345void
347{
348 //"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1",
349 qDebug();
350 mpa_cborReader->enterContainer();
352
353 qDebug() << m_expectedString;
354 if(m_expectedString == "name")
355 {
356 if(!mpa_cborReader->decodeString(m_currentSampleName))
357 {
358 throw pappso::PappsoException("sample name is not a string");
359 }
360 }
361 else
362 {
363 throw pappso::PappsoException("ERROR reading sample : expecting name element in sample");
364 }
365 //"identification_file_list": [{ "name":
366 //"/home/langella/data1/tandem/tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.xml",
367 //}],
368
370
371 qDebug() << m_expectedString;
373 if(m_expectedString == "identification_file_list")
374 {
375 bool is_ok;
376 mpa_cborReader->enterContainer();
377
378 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
379 {
380 try
381 {
383 }
384 catch(const pappso::PappsoException &error)
385 {
387 QObject::tr("ERROR reading sample/identification_file_list : %1")
388 .arg(error.qwhat()));
389 }
390 }
391 mpa_cborReader->leaveContainer();
392
394 }
395 //"peaklist_file": {"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.mzml"
396 //},
397
398 if(m_expectedString == "peaklist_file")
399 {
400 bool is_ok;
401 try
402 {
404 }
405 catch(const pappso::PappsoException &error)
406 {
408 QObject::tr("ERROR reading sample/peaklist_file : %1").arg(error.qwhat()));
409 }
410 }
411 else
412 {
413 throw pappso::PappsoException("ERROR: expecting peaklist_file element in sample");
414 }
415 //"scan_list": [
416 sampleStarted(monitor);
418 if(m_expectedString == "scan_list")
419 {
420 if(!mpa_cborReader->isArray())
421 {
422 throw pappso::PappsoException("ERROR in scan_list: expecting an array");
423 }
424 mpa_cborReader->enterContainer(); // array
425
426 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
427 {
428 readScan(monitor);
429 }
430 mpa_cborReader->leaveContainer();
431 }
432 else
433 {
434 throw pappso::PappsoException("ERROR: expecting scan_list element in sample");
435 }
436 mpa_cborReader->leaveContainer();
437
438 sampleFinished(monitor);
439}
440
441void
443{
444 qDebug();
445 m_cborScanId.clear();
446
447 if(!mpa_cborReader->isMap())
448 {
449 throw pappso::PappsoException("ERROR in scan: expecting a map");
450 }
451 mpa_cborReader->enterContainer();
452 //"id": {
453 //"index": 1976
454 //},
455 qDebug() << "scan begin";
456 QString last_expected_string = m_expectedString;
457
459 qDebug() << m_expectedString;
460 if(m_expectedString == "id")
461 {
462 if(!mpa_cborReader->readCborMap(m_cborScanId))
463 {
464 throw pappso::PappsoException(QObject::tr("id element in scan is not a cbor map"));
465 }
466 }
467 else
468 {
470 QObject::tr(
471 "ERROR: expecting id element in scan not %1, sample %2, last expected string %3")
472 .arg(m_expectedString)
474 .arg(last_expected_string));
475 }
476 //"precursor": {
477 //"z": 2,
478 //"mz": 1120.529471
479 //},
480
482 m_cborScanPrecursor.clear();
483 qDebug() << m_expectedString;
484 if(m_expectedString == "precursor")
485 {
486 if(!mpa_cborReader->readCborMap(m_cborScanPrecursor))
487 {
488 throw pappso::PappsoException(QObject::tr("precursor element in scan is not a cbor map"));
489 }
490 }
491 else
492 {
493 throw pappso::PappsoException(QObject::tr("ERROR: expecting precursor element after id in "
494 "scan not %1, sample %2, last expected string %3")
495 .arg(m_expectedString)
497 .arg(last_expected_string));
498 }
499 //"ms2": {PSM CBOR format documentation
500 //"rt": 12648.87,
501 //"mz" :[1,2,3,4],
502 //"intensity" : [1,2,3,4]
503 //},
504
506 qDebug() << m_expectedString;
507 m_cborScanMs2.clear();
508 if(m_expectedString == "ms2")
509 {
510 if(!mpa_cborReader->readCborMap(m_cborScanMs2))
511 {
513 QObject::tr("ms2 element after precursor in scan is not a cbor map %1 %2:\n%3")
515 .arg(m_cborScanId.value("index").toInteger())
516 .arg(mpa_cborReader->lastError().toString()));
517 }
518 }
519 else
520 {
522 QObject::tr(
523 "ERROR: expecting ms2 element in scan not %1, sample %2, last expected string %3")
524 .arg(m_expectedString)
526 .arg(last_expected_string));
527 }
528
529
531 qDebug() << m_expectedString;
532
533
534 if(m_expectedString == "props")
535 {
536 bool is_ok;
537 is_ok = mpa_cborReader->readCborMap(m_cborScanProps);
538 if(!is_ok)
539 {
540 throw pappso::PappsoException("ERROR: props element in scan is not well formed");
541 }
542 if(!getExpectedString())
543 {
545 QObject::tr("ERROR: expecting psm_list element in scan %1").arg(m_currentPsmProforma));
546 }
547 }
548
549 //"psm_list": [
550 scanStarted(monitor);
551 if(m_expectedString == "psm_list")
552 {
553 qDebug() << "psm_list";
554 if(!mpa_cborReader->isArray())
555 {
556 throw pappso::PappsoException("ERROR in psm_list: expecting an array");
557 }
558 mpa_cborReader->enterContainer();
559 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
560 {
561 readPsm(monitor);
562 }
563 mpa_cborReader->leaveContainer();
564 }
565
566 mpa_cborReader->leaveContainer();
567 qDebug() << "scan end";
568 scanFinished(monitor);
569 qDebug();
570}
571
572void
574{
575 qDebug();
576 bool is_ok;
577 if(!mpa_cborReader->isMap())
578 {
579 throw pappso::PappsoException("ERROR in psm: expecting a map");
580 }
581 mpa_cborReader->enterContainer();
583 // "proforma": "AQEEM[+15.99491]AQVAK",
584 if(m_expectedString == "proforma")
585 {
586 if(!mpa_cborReader->decodeString(m_currentPsmProforma))
587 {
588 throw pappso::PappsoException("ERROR: proforma element in psm-scan is not a string");
589 }
590 }
591 else
592 {
593 throw pappso::PappsoException("ERROR: expecting proforma element in psm-scan");
594 }
595 //"protein_list" : [
596 //{
597 //"accession": "GRMZM2G083841_P01",
598 //"position": [15,236]
599 //}
600 //],
601
604 qDebug() << m_expectedString;
605
606 if(m_expectedString == "protein_list")
607 {
608 mpa_cborReader->enterContainer(); // array
609 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
610 {
612 if(!is_ok)
613 {
614 qDebug();
616 QObject::tr("ERROR: reading protein_list element in psm-scan"));
617 }
618 }
619 // qDebug() << mpa_cborReader->type();
620 mpa_cborReader->leaveContainer(); // array
621 }
622 else
623 {
624 throw pappso::PappsoException("ERROR: expecting protein_list element in psm-scan");
625 }
626 // props: {
627 m_cborScanPsmProps.clear();
628
629 //"eval": {
630 qDebug();
631 m_cborScanPsmEval.clear();
633 qDebug() << m_expectedString;
634
635 if(m_expectedString == "props")
636 {
637 is_ok = mpa_cborReader->readCborMap(m_cborScanPsmProps);
638 if(!is_ok)
639 {
640 throw pappso::PappsoException("ERROR: props element in psm-scan is not well formed");
641 }
642 if(!getExpectedString())
643 {
645 QObject::tr("ERROR: expecting eval element in psm-scan %1").arg(m_currentPsmProforma));
646 }
647 }
648 if(m_expectedString == "eval")
649 {
650 is_ok = mpa_cborReader->readCborMap(m_cborScanPsmEval);
651 if(!is_ok)
652 {
653 throw pappso::PappsoException("ERROR: eval element in psm-scan is not well formed");
654 }
655 }
656 else
657
658 {
660 QObject::tr("ERROR: expecting eval element in psm-scan %1 not %2 in %3 %4 %5")
662 .arg(m_expectedString)
663 .arg(__FILE__)
664 .arg(__FUNCTION__)
665 .arg(__LINE__));
666 }
667
668
669 qDebug() << m_expectedString;
670
671
672 mpa_cborReader->leaveContainer();
673 qDebug();
674 psmReady(monitor);
675}
676
677void
679{
680 // PSM is ready, do what you want :)
681}
682
683void
687
688void
692
693void
695{
696}
697
698
699void
703
704void
708
709void
713
714void
718
719void
723
724void
728
731{
732 pappso::PeptideSp peptide_sp;
733 if(m_currentPsmProforma.isEmpty())
734 {
735 throw pappso::PappsoException(QObject::tr("ERROR: m_currentPsmProforma is empty"));
736 }
737 else
738 {
740 }
741 return peptide_sp;
742}
743
746{
747 if(m_currentPeaklistFile.name.isEmpty())
748 {
749 throw pappso::PappsoException(QObject::tr("ERROR: m_currentPeaklistFile is empty"));
750 }
751 if(m_cborScanId.isEmpty())
752 {
753 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanId is empty"));
754 }
755 if(m_cborScanPrecursor.isEmpty())
756 {
757 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanPrecursor is empty"));
758 }
759
760 if(!m_cborScanId.keys().contains("index"))
761 {
762 throw pappso::PappsoException("There is no scan index");
763 }
764
765 if(m_cborScanMs2.isEmpty())
766 {
767 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanMs2 is empty"));
768 }
769 else
770 {
771 if(!m_cborScanMs2.keys().contains("mz"))
772 {
773 throw pappso::PappsoException("There is no ms2 mz values");
774 }
775 if(!m_cborScanMs2.keys().contains("intensity"))
776 {
777 throw pappso::PappsoException("There is no ms2 intensity values");
778 }
779 }
782 pappso::MsRunIdCstSPtr msrun_id_sp = std::make_shared<const pappso::MsRunId>(msrun_id);
783 pappso::MassSpectrumId ms_id(msrun_id_sp);
784 ms_id.setSpectrumIndex(m_cborScanId.value("index").toInteger());
785
786 // native_id
787 if(m_cborScanId.keys().contains("native_id"))
788 {
789 ms_id.setNativeId(m_cborScanId.value("native_id").toString());
790 }
791
792 std::vector<DataPoint> data_point_vector;
793 std::size_t i = 0;
794 for(auto cbor_mz_value : m_cborScanMs2.value("mz").toArray())
795 {
796 data_point_vector.push_back(
797 {cbor_mz_value.toDouble(), m_cborScanMs2.value("intensity").toArray().at(i).toDouble()});
798 i++;
799 }
800
801
802 MassSpectrum mass_spectrum(data_point_vector);
803 pappso::PrecursorIonData precursor_ion_data;
804
805 pappso::QualifiedMassSpectrum qualified_mass_spectrum(ms_id);
806 qualified_mass_spectrum.setMassSpectrumSPtr(mass_spectrum.makeMassSpectrumSPtr());
807 qualified_mass_spectrum.setMsLevel(2);
808
809 if(m_cborScanPrecursor.keys().contains("z"))
810 {
811 precursor_ion_data.charge = m_cborScanPrecursor.value("z").toInteger();
812 }
813 if(m_cborScanPrecursor.keys().contains("mz"))
814 {
815 precursor_ion_data.mz = m_cborScanPrecursor.value("mz").toDouble();
816 }
817 if(m_cborScanPrecursor.keys().contains("intensity"))
818 {
819 precursor_ion_data.intensity = m_cborScanPrecursor.value("intensity").toDouble();
820 }
821 qualified_mass_spectrum.appendPrecursorIonData(precursor_ion_data);
822 if(m_cborScanMs2.keys().contains("rt"))
823 {
824 qualified_mass_spectrum.setRtInSeconds(m_cborScanMs2.value("rt").toDouble());
825 }
826
827
828 return qualified_mass_spectrum.makeQualifiedMassSpectrumSPtr();
829}
830
831void
835
836void
840
841
842double
843PsmFileReaderBase::getPrecursorMass(double mz_prec, uint charge) const
844{
845 // compute precursor mass given the charge state
846 mz_prec = mz_prec * (double)charge;
847 mz_prec -= (MHPLUS * (double)charge);
848 return mz_prec;
849}
850
851
852} // namespace psm
853} // namespace cbor
854} // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition msrunid.h:54
void setSampleName(const QString &name)
set a sample name for this MsRunId
Definition msrunid.cpp:77
virtual const QString & qwhat() const
static PeptideSp parseString(const QString &pepstr)
Class representing a fully specified mass spectrum.
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
QualifiedMassSpectrumSPtr makeQualifiedMassSpectrumSPtr() const
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
overrides QCborStreamWriter base class to provide convenient functions
std::vector< PsmProteinRef > m_currentPsmProteinRefList
pappso::QualifiedMassSpectrumSPtr getCurrentQualifiedMassSpectrumSPtr() const
get the qualified Spectrum for the current PSM
pappso::PeptideSp getCurrentPsmPeptideSp() const
bool currentProteinRefListContainsTarget() const
tells if the current PSM has a target accession
virtual void sampleListStarted(pappso::UiMonitorInterface &monitor)
void writePsmFileList(CborStreamWriter &writer, const std::vector< PsmFile > &file_list)
double getPrecursorMass(double mz_prec, uint charge) const
convenient function do compute precusor ion mass
virtual void logReady(pappso::UiMonitorInterface &monitor)
virtual void scanStarted(pappso::UiMonitorInterface &monitor)
virtual void readPsm(pappso::UiMonitorInterface &monitor)
virtual void readLog(pappso::UiMonitorInterface &monitor)
virtual void proteinMapReady(pappso::UiMonitorInterface &monitor)
virtual void sampleStarted(pappso::UiMonitorInterface &monitor)
bool currentProteinRefListContainsDecoy() const
tells if the current PSM has a decoy accession
virtual void readParameterMap(pappso::UiMonitorInterface &monitor)
void readCbor(QFile *cborp, pappso::UiMonitorInterface &monitor)
virtual void readScan(pappso::UiMonitorInterface &monitor)
virtual void readInformations(pappso::UiMonitorInterface &monitor)
virtual void scanFinished(pappso::UiMonitorInterface &monitor)
virtual void sampleListFinished(pappso::UiMonitorInterface &monitor)
virtual void psmReady(pappso::UiMonitorInterface &monitor)
virtual void informationsReady(pappso::UiMonitorInterface &monitor)
void readRoot(pappso::UiMonitorInterface &monitor)
std::vector< PsmFile > m_currentIdentificationFileList
void writePsmFile(CborStreamWriter &writer, const PsmFile &psm_file)
virtual void fastaFilesReady(pappso::UiMonitorInterface &monitor)
virtual void parameterMapReady(pappso::UiMonitorInterface &monitor)
virtual void readProteinMap(pappso::UiMonitorInterface &monitor)
virtual void readSample(pappso::UiMonitorInterface &monitor)
PsmProteinRef readPsmProteinRef(bool &is_ok)
virtual void sampleFinished(pappso::UiMonitorInterface &monitor)
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< QualifiedMassSpectrum > QualifiedMassSpectrumSPtr
std::shared_ptr< const Peptide > PeptideSp
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
const pappso_double MHPLUS(1.007276466879)
unsigned int uint
Definition types.h:67