libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
mzidentmlreader.cpp
Go to the documentation of this file.
1/**
2 * \file src/input/mzidentml/mzidentmlreader.cpp
3 * \date 24/11/2022
4 * \author Olivier Langella
5 * \brief new method to read mzIdentML XML files
6 */
7
8
9/*******************************************************************************
10 * Copyright (c) 2022 Olivier Langella
11 *<Olivier.Langella@universite-paris-saclay.fr>.
12 *
13 * This file is part of i2MassChroQ.
14 *
15 * i2MassChroQ is free software: you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation, either version 3 of the License, or
18 * (at your option) any later version.
19 *
20 * i2MassChroQ is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with i2MassChroQ. If not, see <http://www.gnu.org/licenses/>.
27 *
28 ******************************************************************************/
29
30#include "mzidentmlreader.h"
32
34#include <QDebug>
35#include <memory>
36
37
38namespace pappso
39{
40namespace cbor
41{
42namespace psm
43{
44QString
46{
47 return QString("%1 %2 %3 %4").arg(cvRef).arg(accession).arg(name).arg(value);
48}
49
50
51QString
53{
54 return QString("%1 %2").arg(name).arg(value);
55}
56
57
60 const QFileInfo &mzident_file)
61{
62
63 mp_monitor = p_monitor;
64 qDebug() << mzident_file.absoluteFilePath() << "'";
66 m_mzidentFile = mzident_file;
67
68
69 try
70 {
71
72 mp_cborWriter = p_output;
73
74 mp_cborWriter->startMap();
75 mp_cborWriter->writeInformations(
76 "mzidentml_file_reader", Utils::getVersion(), "psm", "mzidentml reader");
77
78
79 if(!readFile(mzident_file.absoluteFilePath()))
80 {
81
82 if(errorString() == "Not an MzIdentML file")
83 {
85 QObject::tr("Error reading %1 not mzIdentML file :\n %2")
86 .arg(mzident_file.absoluteFilePath())
87 .arg(errorString()));
88 }
89 else
90 {
91 throw pappso::PappsoException(QObject::tr("Error reading %1 mzIdentML file :\n %2")
92 .arg(mzident_file.absoluteFilePath())
93 .arg(errorString()));
94 }
95 }
96
97
98 mp_cborWriter->endMap();
99 }
101 {
102 throw err;
103 }
104 catch(pappso::PappsoException &other_err)
105 {
106 throw pappso::PappsoException(QObject::tr("Error reading mzIdentML file %1:\n%2")
107 .arg(mzident_file.absoluteFilePath())
108 .arg(other_err.qwhat()));
109 }
110}
111
115
116
117void
119{
120 qDebug();
121 mp_cborWriter->append("parameter_map");
122 mp_cborWriter->writeCborMap(m_cborParameterMap);
123
124 QStringList fasta_files;
125 for(auto &pair_searchdb : m_mzidSearchDatabaseIdMap)
126 {
127 fasta_files << pair_searchdb.second.file;
128 }
129 mp_cborWriter->append("target_fasta_files");
130 mp_cborWriter->writeArray(fasta_files);
131
132
133 for(std::pair<const QString, MzidDBSequence> &pair_sequence : m_MzidDBSequenceIdMap)
134 {
135 PsmProtein psm_protein;
136
137 psm_protein.isContaminant = false;
138 psm_protein.isTarget = !pair_sequence.second.is_decoy;
139 psm_protein.protein_sp = pair_sequence.second.protein_sp;
140
141 m_proteinMap.insert(psm_protein);
142 }
143
144 mp_cborWriter->append(QString("protein_map"));
145 m_proteinMap.writeMap(*mp_cborWriter);
146
147 mp_cborWriter->append("sample_list");
148 mp_cborWriter->startArray();
149 for(std::pair<const QString, std::vector<SpectrumIdentificationResult>> &pair_spectra :
151 {
152
153 mp_cborWriter->startMap();
154 mp_cborWriter->append("name");
155 mp_cborWriter->append(m_mzidSpectraDataIdMap.at(pair_spectra.first).name);
156
157 mp_cborWriter->append("identification_file_list");
158 mp_cborWriter->startArray();
159 mp_cborWriter->startMap();
160 mp_cborWriter->append("name");
161 mp_cborWriter->append(m_mzidentFile.absoluteFilePath());
162 mp_cborWriter->endMap();
163 mp_cborWriter->endArray();
164 // one_sample.cbor_core_sample.value("identification_file_list")
165 // .toCbor(m_sageReader.getCborStreamWriter());
166
167
168 mp_cborWriter->append("peaklist_file");
169 mp_cborWriter->startMap();
170 mp_cborWriter->append("name");
171 mp_cborWriter->append(m_mzidSpectraDataIdMap.at(pair_spectra.first).file);
172 mp_cborWriter->endMap();
173 //"scan_list": [
174
175 mp_cborWriter->append("scan_list");
176 mp_cborWriter->startArray(pair_spectra.second.size());
177 for(SpectrumIdentificationResult &it_spectrum_ident : pair_spectra.second)
178 {
179 writeSpectrumIdentificationResult(it_spectrum_ident);
180 }
181 mp_cborWriter->endArray();
182
183
184 mp_cborWriter->endMap();
185 }
186
187 mp_cborWriter->endArray();
188 qDebug();
189}
190
191void
193{
194 // mp_monitor->setStatus("reading X!Tandem result file");
195 if(m_qxmlStreamReader.readNextStartElement())
196 {
197
198 qDebug() << m_qxmlStreamReader.name().toString();
199 if(m_qxmlStreamReader.name().toString().toLower() == "mzidentml")
200 {
201 while(m_qxmlStreamReader.readNextStartElement())
202 {
203 // cvList
204 if(m_qxmlStreamReader.name().toString() == "cvList")
205 {
206 m_qxmlStreamReader.skipCurrentElement();
207 }
208 // AnalysisSoftwareList
209 else if(m_qxmlStreamReader.name().toString() == "AnalysisSoftwareList")
210 {
211 while(readAnalysisSoftware())
212 {
213 }
214 } // Provider
215 else if(m_qxmlStreamReader.name().toString() == "Provider")
216 {
217 m_qxmlStreamReader.skipCurrentElement();
218 }
219 // AuditCollection
220 else if(m_qxmlStreamReader.name().toString() == "AuditCollection")
221 {
222 m_qxmlStreamReader.skipCurrentElement();
223 }
224 // SequenceCollection
225 else if(m_qxmlStreamReader.name().toString() == "SequenceCollection")
226 {
228 {
229 }
230 }
231 // AnalysisCollection
232 else if(m_qxmlStreamReader.name().toString() == "AnalysisCollection")
233 {
235 {
236 }
237 }
238 // AnalysisProtocolCollection
239 else if(m_qxmlStreamReader.name().toString() == "AnalysisProtocolCollection")
240 {
242 }
243
244 // DataCollection
245 else if(m_qxmlStreamReader.name().toString() == "DataCollection")
246 {
248 {
249 }
250 }
251 else if(m_qxmlStreamReader.name().toString() == "BibliographicReference")
252 {
253 m_qxmlStreamReader.skipCurrentElement();
254 }
255
256 else
257 {
258 m_qxmlStreamReader.raiseError(
259 QObject::tr("element %1 not implemented").arg(m_qxmlStreamReader.name()));
260 m_qxmlStreamReader.skipCurrentElement();
261 }
262 }
263 }
264 else
265 {
266 m_qxmlStreamReader.raiseError(QObject::tr("Not an MzIdentML input file"));
267 m_qxmlStreamReader.skipCurrentElement();
268 }
269 }
270}
271
272
273void
275{
276 //<AnalysisProtocolCollection>
277
278 qDebug();
279 while(m_qxmlStreamReader.readNextStartElement())
280 {
281 qDebug() << m_qxmlStreamReader.name();
282 if(m_qxmlStreamReader.name().toString() == "SpectrumIdentificationProtocol")
283 {
284 auto it_soft = m_IdentificationEngineMap.find(
285 m_qxmlStreamReader.attributes().value("analysisSoftware_ref").toString());
286 if(it_soft != m_IdentificationEngineMap.end())
287 {
288 IdentificationEngine identification_engine = it_soft->second;
289
290 if(identification_engine == IdentificationEngine::XTandem)
291 {
292 //<SpectrumIdentificationProtocol analysisSoftware_ref="ID_software"
293 // id="SearchProtocol_1">
294 QCborMap identification_engine_parameters;
295
296 while(m_qxmlStreamReader.readNextStartElement())
297 {
298 if(m_qxmlStreamReader.name().toString() == "SearchType")
299 {
300 m_qxmlStreamReader.skipCurrentElement();
301 }
302 else if(m_qxmlStreamReader.name().toString() == "ModificationParams")
303 {
304 m_qxmlStreamReader.skipCurrentElement();
305 }
306
307 else if(m_qxmlStreamReader.name().toString() == "Threshold")
308 {
309 m_qxmlStreamReader.skipCurrentElement();
310 }
311 else if(m_qxmlStreamReader.name().toString() == "ParentTolerance")
312 {
313 m_qxmlStreamReader.skipCurrentElement();
314 }
315 else if(m_qxmlStreamReader.name().toString() == "FragmentTolerance")
316 {
317 m_qxmlStreamReader.skipCurrentElement();
318 }
319 else if(m_qxmlStreamReader.name().toString() == "Enzymes")
320 {
321 m_qxmlStreamReader.skipCurrentElement();
322 }
323 else if(m_qxmlStreamReader.name().toString() == "AdditionalSearchParams")
324 {
325 while(m_qxmlStreamReader.readNextStartElement())
326 {
327 if(m_qxmlStreamReader.name().toString() == "userParam")
328 {
329 //<userParam name="list path, default parameters"
330 // value="/tmp/i2masschroq.ubFSuT/QExactive_analysis_FDR_nosemi.xml"/>
331 identification_engine_parameters.insert(
332 m_qxmlStreamReader.attributes().value("name").toString(),
333 m_qxmlStreamReader.attributes().value("value").toString());
334 m_qxmlStreamReader.skipCurrentElement();
335 }
336 else
337 {
338 m_qxmlStreamReader.skipCurrentElement();
339 }
340 }
341 }
342 else
343 {
344
345 m_qxmlStreamReader.raiseError(QObject::tr("element %1 not implemented")
346 .arg(m_qxmlStreamReader.name()));
347 m_qxmlStreamReader.skipCurrentElement();
348 }
349 }
350
351 m_cborParameterMap.insert(QString("xtandem"),
352 identification_engine_parameters.toCborValue());
353 // qWarning() << "coucou";
354 }
355 else
356 {
357 // other than tandem
358 m_qxmlStreamReader.skipCurrentElement();
359 }
360 }
361 else
362 {
363 m_qxmlStreamReader.skipCurrentElement();
364 }
365 }
366 else
367 {
368 m_qxmlStreamReader.raiseError(
369 QObject::tr("element %1 not implemented").arg(m_qxmlStreamReader.name()));
370 m_qxmlStreamReader.skipCurrentElement();
371 }
372 }
373 qDebug();
374}
375
376bool
378{
379
380
381 /** @brief stores the current analysis software id
382 */
383 QString analysisSoftwareId;
384
385
386 /** @brief tells if the software name has been found and is handled by the
387 * parser
388 */
389 IdentificationEngine analysisSotwareNameFound = IdentificationEngine::unknown;
390 if(m_qxmlStreamReader.readNextStartElement())
391 {
392 if(m_qxmlStreamReader.name().toString() == "AnalysisSoftware")
393 {
394
395 // <AnalysisSoftware version="0.0.9" name="DeepProt" id="as1">
396
397
398 analysisSoftwareId = m_qxmlStreamReader.attributes().value("id").toString();
399 QString software_name = m_qxmlStreamReader.attributes().value("name").toString();
400
401
402 m_analysisSoftwareVersion = m_qxmlStreamReader.attributes().value("version").toString();
403
404 if(software_name == "SpecOMS")
405 {
406 analysisSotwareNameFound = IdentificationEngine::SpecOMS;
407
408 m_qxmlStreamReader.skipCurrentElement();
409 }
410 else if(software_name == "DeepProt")
411 {
412 analysisSotwareNameFound = IdentificationEngine::SpecOMS;
413
414 m_qxmlStreamReader.skipCurrentElement();
415 }
416 else
417 {
418 while(m_qxmlStreamReader.readNextStartElement())
419 {
420 if(m_qxmlStreamReader.name().toString() == "SoftwareName")
421 {
422 while(m_qxmlStreamReader.readNextStartElement())
423 {
424 if(m_qxmlStreamReader.name().toString() == "cvParam")
425 {
426 CvParam cv_param = readCvParam();
427 if(cv_param.accession == "MS:1001476")
428 {
429 analysisSotwareNameFound = IdentificationEngine::XTandem;
430 }
431 else if(cv_param.accession == "MS:1002048")
432 {
433 analysisSotwareNameFound = IdentificationEngine::MSGFplus;
434 }
435 else if(cv_param.accession == "MS:1001946")
436 {
437 analysisSotwareNameFound = IdentificationEngine::PEAKS_Studio;
438 }
439 }
440 }
441 }
442 else
443 {
444 m_qxmlStreamReader.skipCurrentElement();
445 }
446 }
447 }
448
449
450 switch(analysisSotwareNameFound)
451 {
453 break;
455 break;
457 break;
459 break;
460
461 default:
462 m_qxmlStreamReader.raiseError(
463 QObject::tr("identification results from %1 are not supported yet, "
464 "Please contact "
465 "the PAPPSO team.")
466 .arg(software_name));
467 return false;
468 }
469
470
471 auto it = m_IdentificationEngineMap.insert(
472 std::pair<QString, IdentificationEngine>(analysisSoftwareId, analysisSotwareNameFound));
473
474 if(it.second == false)
475 {
476 it.first->second = analysisSotwareNameFound;
477 }
478 }
479 else
480 {
481 m_qxmlStreamReader.raiseError(QObject::tr("Not an MzIdentML input file"));
482 m_qxmlStreamReader.skipCurrentElement();
483 return false;
484 }
485 return true;
486 }
487 return false;
488}
489
490bool
492{
493 if(m_qxmlStreamReader.readNextStartElement())
494 {
495 if(m_qxmlStreamReader.name().toString() == "DBSequence")
496 {
498 }
499 else if(m_qxmlStreamReader.name().toString() == "Peptide")
500 {
501 readPeptide();
502 }
503 // PeptideEvidence
504 else if(m_qxmlStreamReader.name().toString() == "PeptideEvidence")
505 {
507 }
508 else
509 {
510 m_qxmlStreamReader.raiseError(
511 QObject::tr("Error in MzIdentML input file, %1 no DBSequence")
512 .arg(m_qxmlStreamReader.name()));
513 m_qxmlStreamReader.skipCurrentElement();
514 return false;
515 }
516 return true;
517 }
518 return false;
519}
520
521void
523{
524
525 // attributes.value("base_name")
526 // ProteinXtpSp sp_xtp_protein = _current_protein.makeProteinXtpSp();
527 MzidDBSequence dbsequence;
528 dbsequence.is_decoy = false;
529 dbsequence.accession = m_qxmlStreamReader.attributes().value("accession").toString();
530
531 QString accession_description = dbsequence.accession;
532 dbsequence.searchDatabase_ref =
533 m_qxmlStreamReader.attributes().value("searchDatabase_ref").toString();
534
535 QString id = m_qxmlStreamReader.attributes().value("id").toString();
536
537
538 while(m_qxmlStreamReader.readNextStartElement())
539 {
540 if(m_qxmlStreamReader.name().toString() == "cvParam")
541 {
542 CvParam cv_param = readCvParam();
543
544
545 if(cv_param.accession == "MS:1001088")
546 {
547 // protein description
548 // protein.get()->setDescription(cv_param.value);
549 dbsequence.description = cv_param.value;
550 accession_description.append(" ").append(dbsequence.description);
551 }
552 else if(cv_param.accession == "MS:1001195")
553 {
554 // PSI-MS MS:1001195 decoy DB type reverse
555 // protein.get()->setIsDecoy(true);
556 dbsequence.is_decoy = true;
557 }
558 else
559 {
560 dbsequence.cvParamList.push_back(cv_param);
561 }
562 }
563 else if(m_qxmlStreamReader.name().toString().toLower() == "seq")
564 {
565
566
567 dbsequence.sequence = m_qxmlStreamReader.readElementText();
568 }
569 else
570 {
571
572 m_qxmlStreamReader.raiseError(
573 QObject::tr("Error in MzIdentML/DBSequence unexpected %1 tag")
574 .arg(m_qxmlStreamReader.name()));
575 m_qxmlStreamReader.skipCurrentElement();
576 }
577 }
578
579
580 // qWarning() << accession_description;
581 dbsequence.protein_sp = std::make_shared<Protein>(accession_description, dbsequence.sequence);
582
583 m_MzidDBSequenceIdMap.insert({id, dbsequence});
584 // searchDatabase_ref="SearchDB_1"
585
586 // protein.get()->setFastaFileP(fastaFile.get());
587}
588
591{
592 CvParam cv_param;
593 cv_param.cvRef = m_qxmlStreamReader.attributes().value("cvRef").toString();
594 cv_param.accession = m_qxmlStreamReader.attributes().value("accession").toString();
595 cv_param.name = m_qxmlStreamReader.attributes().value("name").toString();
596 cv_param.value = m_qxmlStreamReader.attributes().value("value").toString();
597 cv_param.unitAccession = m_qxmlStreamReader.attributes().value("unitAccession").toString();
598 cv_param.unitName = m_qxmlStreamReader.attributes().value("unitName").toString();
599 cv_param.unitCvRef = m_qxmlStreamReader.attributes().value("unitCvRef").toString();
600 m_qxmlStreamReader.skipCurrentElement();
601 return cv_param;
602}
603
606{
607 UserParam user_param;
608
609 user_param.name = m_qxmlStreamReader.attributes().value("name").toString();
610 user_param.value = m_qxmlStreamReader.attributes().value("value").toString();
611 m_qxmlStreamReader.skipCurrentElement();
612
613 return user_param;
614}
615
616
617void
619{
620 std::shared_ptr<pappso::Peptide> peptide_sp;
621
622 QString xml_id = m_qxmlStreamReader.attributes().value("id").toString();
623
624 // PeptideSequence
625 if(m_qxmlStreamReader.readNextStartElement())
626 {
627 if(m_qxmlStreamReader.name().toString() == "PeptideSequence")
628 {
629 peptide_sp = std::make_shared<Peptide>(m_qxmlStreamReader.readElementText().simplified());
630 }
631 else
632 {
633
634 m_qxmlStreamReader.raiseError(
635 QObject::tr("Error in MzIdentML/Peptide no PeptideSequence"));
636 }
637 }
638
639 while(m_qxmlStreamReader.readNextStartElement())
640 {
641 if(m_qxmlStreamReader.name().toString() == "Modification")
642 {
643 // <Modification monoisotopicMassDelta="-0.02682025649"
644 // location="1">
645 // <cvParam accession="-0.0268203" cvRef="PSI-MOD" name=""/>
646 // </Modification>
647
648 Modification modification;
649 modification.monoisotopicMassDelta =
650 m_qxmlStreamReader.attributes().value("monoisotopicMassDelta").toDouble();
651 modification.location = m_qxmlStreamReader.attributes().value("location").toUInt();
652 bool is_cv_param = false;
653 while(m_qxmlStreamReader.readNextStartElement())
654 {
655 if(m_qxmlStreamReader.name().toString() == "cvParam")
656 {
657 is_cv_param = true;
658 modification.cvParam = readCvParam();
659
660
661 // qDebug() << "startElement_aa ";
662 pappso::AaModificationP modif = nullptr;
663
664
665 qDebug() << modification.monoisotopicMassDelta;
666 if(modification.cvParam.accession != "")
667 {
668 qDebug() << modification.cvParam.accession;
669 if(modification.cvParam.accession.startsWith("UNIMOD:"))
670 {
672 modification.cvParam.accession);
673 }
674 else
675 {
676 // hope it is psi mod:
677 if(modification.cvParam.accession.startsWith("MOD:"))
678 {
679 modif =
681 }
682 else if(modification.cvParam.accession == "MS:1001460")
683 {
684 //[Term]
685 // id: MS:1001460
686 // name: unknown modification
687 // def: "This term should be given if the
688 // modification was unknown." [PSI:PI] is_a:
689 // MS:1001471 ! peptide modification details
691 modification.monoisotopicMassDelta);
692 }
693 else
694 {
695 qInfo() << "MzIdentMlSaxHandler::endElement_"
696 "Modification unknown "
697 "modification "
698 << modification.cvParam.accession << " "
699 << modification.cvParam.name;
700 }
701 }
702 }
703
704 if(modif == nullptr)
705 {
706 m_qxmlStreamReader.raiseError(
707 QObject::tr("Error in MzIdentML/Peptide/Modification/cvParam "
708 "modification accession %1 not found")
709 .arg(modification.cvParam.accession));
710 return;
711 }
712
713 if(modification.location == 0)
714 {
715 peptide_sp.get()->addAaModification(modif, 0);
716 }
717 else
718 {
719 peptide_sp.get()->addAaModification(modif, modification.location - 1);
720 }
721 }
722 else
723 {
724
725 m_qxmlStreamReader.raiseError(
726 QObject::tr("Error in MzIdentML/Peptide/Modification "
727 "unexpected %1 tag")
728 .arg(m_qxmlStreamReader.name()));
729 }
730 }
731
732 if(is_cv_param == false)
733 {
734 pappso::AaModificationP modif_without_cvparam = nullptr;
735 // no cv param element
736
737 if(modification.location == 0)
738 {
739 modif_without_cvparam =
741 peptide_sp.get()->getAa(0).getAminoAcidChar(),
742 modification.monoisotopicMassDelta);
743 peptide_sp.get()->addAaModification(modif_without_cvparam, 0);
744 }
745 else
746 {
747 modif_without_cvparam =
749 peptide_sp.get()->getAa(modification.location - 1).getAminoAcidChar(),
750 modification.monoisotopicMassDelta);
751 peptide_sp.get()->addAaModification(modif_without_cvparam,
752 modification.location - 1);
753 }
754 }
755 }
756
757 else
758 {
759
760 m_qxmlStreamReader.raiseError(QObject::tr("Error in MzIdentML/Peptide unexpected %1 tag")
761 .arg(m_qxmlStreamReader.name()));
762 }
763 }
764
765
766 m_PeptideIdMap.insert(std::pair<QString, PeptideSp>(xml_id, peptide_sp));
767}
768
769bool
771{
772 qDebug();
774
775 auto itprot =
776 m_MzidDBSequenceIdMap.find(m_qxmlStreamReader.attributes().value("dBSequence_ref").toString());
777 if(itprot == m_MzidDBSequenceIdMap.end())
778 {
779 m_qxmlStreamReader.raiseError(
780 QObject::tr("dBSequence_ref %1 not defined")
781 .arg(m_qxmlStreamReader.attributes().value("dBSequence_ref").toString()));
782 return false;
783 }
784 qDebug();
785 pe.protein = itprot->second.protein_sp;
786
787
788 auto itpep = m_PeptideIdMap.find(m_qxmlStreamReader.attributes().value("peptide_ref").toString());
789 if(itpep == m_PeptideIdMap.end())
790 {
791 m_qxmlStreamReader.raiseError(
792 QObject::tr("peptide_ref %1 not defined")
793 .arg(m_qxmlStreamReader.attributes().value("peptide_ref").toString()));
794 return false;
795 }
796 qDebug();
797 pe.peptide = itpep->second;
798
799 pe.start = m_qxmlStreamReader.attributes().value("start").toUInt() - 1;
800 pe.end = m_qxmlStreamReader.attributes().value("end").toUInt() - 1;
801 pe.isDecoy = false;
802 if(m_qxmlStreamReader.attributes().value("isDecoy").toString() == "true")
803 {
804 pe.isDecoy = true;
805 }
806 qDebug();
807 m_MzidPeptideEvidenceIdMap.insert(std::pair<QString, MzidPeptideEvidence>(
808 m_qxmlStreamReader.attributes().value("id").toString(), pe));
809 m_qxmlStreamReader.skipCurrentElement();
810 qDebug();
811 return true;
812}
813
814bool
816{
817
818 qDebug();
819 if(m_qxmlStreamReader.readNextStartElement())
820 {
821 if(m_qxmlStreamReader.name().toString() == "SpectrumIdentification")
822 {
824 qDebug();
825 }
826 else if(m_qxmlStreamReader.name().toString() == "ProteinDetection")
827 {
828 m_qxmlStreamReader.skipCurrentElement();
829 }
830 else
831 {
832 m_qxmlStreamReader.raiseError(
833 QObject::tr("Error in MzIdentML/AnalysisCollection, unexpected %1 t")
834 .arg(m_qxmlStreamReader.name()));
835 m_qxmlStreamReader.skipCurrentElement();
836 return false;
837 }
838 return true;
839 }
840 return false;
841}
842
843void
845{
846
847 m_qxmlStreamReader.skipCurrentElement();
848}
849
850bool
852{
853
854 qDebug();
855 if(m_qxmlStreamReader.readNextStartElement())
856 {
857 if(m_qxmlStreamReader.name().toString() == "Inputs")
858 {
859 readInputs();
860 }
861
862 // AnalysisData
863 else if(m_qxmlStreamReader.name().toString() == "AnalysisData")
864 {
865 qDebug();
867 }
868 else
869 {
870 m_qxmlStreamReader.raiseError(
871 QObject::tr("Error in MzIdentML/DataCollection, unexpected %1 tag")
872 .arg(m_qxmlStreamReader.name()));
873 m_qxmlStreamReader.skipCurrentElement();
874 return false;
875 }
876 return true;
877 }
878 return false;
879}
880
881void
883{
884 while(m_qxmlStreamReader.readNextStartElement())
885 {
886 if(m_qxmlStreamReader.name().toString() == "SearchDatabase")
887 {
889 }
890 else if(m_qxmlStreamReader.name().toString() == "SpectraData")
891 {
893 }
894 else if(m_qxmlStreamReader.name().toString() == "SourceFile")
895 {
896 m_qxmlStreamReader.skipCurrentElement();
897 }
898
899 else
900 {
901 m_qxmlStreamReader.raiseError(
902 QObject::tr("Error in MzIdentML/DataCollection/Inputs, unexpected %1 tag")
903 .arg(m_qxmlStreamReader.name()));
904 m_qxmlStreamReader.skipCurrentElement();
905 }
906 }
907}
908
909
910//<SearchDatabase numDatabaseSequences="136828"
911// location="/home/thierry/test/MS-GF+/Genome_Z_mays_v5a_conta.fasta"
912// id="SearchDB_1">
913// <FileFormat>
914// <cvParam cvRef="PSI-MS" accession="MS:1001348" name="FASTA format"/>
915// </FileFormat>
916// <DatabaseName>
917// <userParam name="Genome_Z_mays_v5a_conta.fasta"/>
918// </DatabaseName>
919// </SearchDatabase>
920bool
922{
923 /* <SearchDatabase
924 location="/gorgone/pappso/versions_logiciels_pappso/tandemng/database/Genome_Z_mays_5a.fasta"
925 id="SearchDB_0"> <FileFormat> <cvParam accession="MS:1001348" cvRef="PSI-MS" name="FASTA
926 format"/>
927 </FileFormat>
928 <DatabaseName>
929 <userParam name="DatabaseName" value="Genome_Z_mays_5a.fasta"/>
930 </DatabaseName>
931 <cvParam accession="MS:1001197" cvRef="PSI-MS" name="DB composition
932 target+decoy"/> <cvParam accession="MS: 1001283" cvRef="PSI-MS" name="decoy DB accession
933 regexp" value="^XXX"/> <cvParam accession="MS: 1001195" cvRef="PSI-MS" name="decoy DB type
934 reverse"/>
935 </SearchDatabase>
936 */
937 qDebug();
938 QString id = m_qxmlStreamReader.attributes().value("id").toString();
939 // auto itfasta = m_FastaFileIdMap.find(id);
940
941 MzidSearchDatabase search_database;
942
943 search_database.file = m_qxmlStreamReader.attributes().value("location").toString();
944
945
946 if(search_database.file.isEmpty())
947 {
948 m_qxmlStreamReader.raiseError(QObject::tr("SearchDatabase id %1 location is empty").arg(id));
949 return false;
950 }
951
952
953 m_mzidSearchDatabaseIdMap.insert({id, search_database});
954
955 m_qxmlStreamReader.skipCurrentElement();
956 qDebug();
957 return true;
958}
959
960void
962{
963
964 qDebug();
965 MzidSpectraData spectra_data;
966 spectra_data.file = m_qxmlStreamReader.attributes().value("location").toString();
967
968
969 // msrun.get()->setXmlId(attributes.value("id"));
970 spectra_data.name = m_qxmlStreamReader.attributes().value("name").toString();
971
972
973 m_mzidSpectraDataIdMap.insert(std::pair<QString, MzidSpectraData>(
974 m_qxmlStreamReader.attributes().value("id").toString(), spectra_data));
975 m_qxmlStreamReader.skipCurrentElement();
976}
977
978void
980{
981
982 qDebug() << m_qxmlStreamReader.name();
983 while(m_qxmlStreamReader.readNextStartElement())
984 {
985 qDebug() << m_qxmlStreamReader.name();
986 if(m_qxmlStreamReader.name().toString() == "SpectrumIdentificationList")
987 {
988 while(m_qxmlStreamReader.readNextStartElement())
989 {
990 qDebug() << m_qxmlStreamReader.name();
991 if(m_qxmlStreamReader.name().toString() == "SpectrumIdentificationResult")
992 {
994 }
995 else if(m_qxmlStreamReader.name().toString() == "FragmentationTable")
996 {
997 m_qxmlStreamReader.skipCurrentElement();
998 }
999 else
1000 {
1001 m_qxmlStreamReader.raiseError(
1002 QObject::tr("Error in "
1003 "MzIdentML/DataCollection/AnalysisData/"
1004 "SpectrumIdentificationList, unexpected %1 tag")
1005 .arg(m_qxmlStreamReader.name()));
1006 m_qxmlStreamReader.skipCurrentElement();
1007 }
1008 }
1009 }
1010 else if(m_qxmlStreamReader.name().toString() == "ProteinDetectionList")
1011 {
1012 // ProteinDetectionList
1013 m_qxmlStreamReader.skipCurrentElement();
1014 }
1015 else
1016 {
1017 m_qxmlStreamReader.raiseError(
1018 QObject::tr("Error in MzIdentML/DataCollection/AnalysisData, "
1019 "unexpected %1 tag")
1020 .arg(m_qxmlStreamReader.name()));
1021 m_qxmlStreamReader.skipCurrentElement();
1022 }
1023 }
1024 qDebug();
1025 finalDebrief();
1026}
1027
1028void
1030{
1031 qDebug() << m_qxmlStreamReader.name();
1032
1033 QString spectra_id = m_qxmlStreamReader.attributes().value("spectraData_ref").toString();
1034 auto it_spectra_data = m_mzidSpectraDataIdMap.find(spectra_id);
1035
1036 if(it_spectra_data == m_mzidSpectraDataIdMap.end())
1037 {
1038 m_qxmlStreamReader.raiseError(QObject::tr("spectraData_ref %1 not defined in "
1039 "m_mzidSpectraDataIdMap")
1040 .arg(spectra_id));
1041 }
1042
1043 auto it_insert_ident = m_spectrumIdentificationResultBySpectraIdMap.insert({spectra_id, {}});
1044
1045 it_insert_ident.first->second.push_back({});
1046 SpectrumIdentificationResult &spectrum_identification_result =
1047 it_insert_ident.first->second.back();
1048 spectrum_identification_result.cvParamList.clear();
1049 spectrum_identification_result.userParamList.clear();
1050 spectrum_identification_result.spectrumIdentificationItemList.clear();
1051
1052 spectrum_identification_result.spectrumID =
1053 m_qxmlStreamReader.attributes().value("spectrumID").toString();
1054 spectrum_identification_result.id = m_qxmlStreamReader.attributes().value("id").toString();
1055
1056
1057 qDebug() << m_qxmlStreamReader.name();
1058
1059 while(m_qxmlStreamReader.readNextStartElement())
1060 {
1061 qDebug() << m_qxmlStreamReader.name();
1062 if(m_qxmlStreamReader.name() == QString("SpectrumIdentificationItem"))
1063 {
1064 qDebug();
1065 readSpectrumIdentificationItem(spectrum_identification_result);
1066 }
1067 else if(m_qxmlStreamReader.name() == QString("cvParam"))
1068 {
1069 CvParam cv_param = readCvParam();
1070 qDebug() << cv_param.toString();
1071 spectrum_identification_result.cvParamList.push_back(cv_param);
1072 }
1073
1074 else if(m_qxmlStreamReader.name() == QString("userParam"))
1075 {
1076 UserParam user_param = readUserParam();
1077 qDebug() << user_param.toString();
1078 }
1079 else
1080 {
1081 m_qxmlStreamReader.raiseError(
1082 QObject::tr("Error in "
1083 "MzIdentML/DataCollection/AnalysisData/"
1084 "SpectrumIdentificationList/"
1085 "SpectrumIdentificationResult, unexpected %1 tag")
1086 .arg(m_qxmlStreamReader.name()));
1087 // m_qxmlStreamReader.skipCurrentElement();
1088 }
1089 }
1090
1091 if(m_qxmlStreamReader.hasError())
1092 return;
1093 qDebug() << m_qxmlStreamReader.name();
1094
1095 // find scan number
1096 spectrum_identification_result.spectrumIndex = 0;
1097 spectrum_identification_result.isSpectrumIndex = false;
1098 spectrum_identification_result.scanNum = 0;
1099 spectrum_identification_result.retentionTime = 0;
1100
1101 // spectrumID="index=194"
1102 if(spectrum_identification_result.spectrumID.startsWith("index="))
1103 {
1104 bool is_ok = false;
1105 spectrum_identification_result.spectrumIndex =
1106 spectrum_identification_result.spectrumID.mid(6).toULongLong(&is_ok);
1107 spectrum_identification_result.isSpectrumIndex = true;
1108 if(!is_ok)
1109 {
1110 m_qxmlStreamReader.raiseError(QObject::tr("reading spectrum index failed in %1"
1111 "SpectrumIdentificationResult id %2")
1112 .arg(spectrum_identification_result.spectrumID)
1113 .arg(spectrum_identification_result.id));
1114 return;
1115 }
1116 }
1117
1118 // <cvParam cvRef="PSI-MS" accession="MS:1001115" name="scan number(s)"
1119 // value="16079"/>
1120 for(auto cvParam : spectrum_identification_result.cvParamList)
1121 {
1122
1123 qDebug() << cvParam.toString();
1124 if(cvParam.accession == "MS:1001115")
1125 {
1126 spectrum_identification_result.scanNum = cvParam.value.toUInt();
1127 }
1128 else if(cvParam.accession == "MS:1003062")
1129 {
1130 spectrum_identification_result.isSpectrumIndex = true;
1131 spectrum_identification_result.spectrumIndex = cvParam.value.toUInt();
1132 }
1133 else if((cvParam.accession == "MS:1000016") || (cvParam.accession == "MS:1000894"))
1134 {
1135 //[Term]
1136 // id: MS:1000894
1137 // name: retention time
1138 // def: "A time interval from the start of chromatography when an
1139 // analyte exits a chromatographic column." [PSI:MS]
1140
1141 // [Term]
1142 // id: MS:1000016
1143 // name: scan start time
1144 // def: "The time that an analyzer started a scan, relative to the
1145 // start of the MS run." [PSI:MS]
1146
1147
1148 spectrum_identification_result.retentionTime = cvParam.value.toDouble();
1149 }
1150 }
1151 if((spectrum_identification_result.scanNum == 0) &&
1152 (spectrum_identification_result.isSpectrumIndex == false))
1153 {
1154 m_qxmlStreamReader.raiseError(QObject::tr("scan number or spectrum index not found in "
1155 "SpectrumIdentificationResult id %1")
1156 .arg(spectrum_identification_result.id));
1157 }
1158
1159
1160 if(spectrum_identification_result.retentionTime == 0)
1161 {
1162 m_qxmlStreamReader.raiseError(
1163 QObject::tr("retention time not found in SpectrumIdentificationResult id %1")
1164 .arg(spectrum_identification_result.id));
1165 }
1166
1167
1168 for(auto spectrumIdentificationItem :
1169 spectrum_identification_result.spectrumIdentificationItemList)
1170 {
1171 // processSpectrumIdentificationItem(spectrum_identification_result,
1172 // spectrumIdentificationItem);
1173 }
1174}
1175
1176void
1178 MzIdentMlReader::SpectrumIdentificationResult &spectrum_identification_result)
1179{
1180
1181 qDebug();
1182 spectrum_identification_result.spectrumIdentificationItemList.push_back(
1184 spectrum_identification_result.spectrumIdentificationItemList.back()
1185 .mzidPeptideEvidenceList.clear();
1186 spectrum_identification_result.spectrumIdentificationItemList.back().cvParamList.clear();
1187 spectrum_identification_result.spectrumIdentificationItemList.back().userParamList.clear();
1188
1189 spectrum_identification_result.spectrumIdentificationItemList.back().chargeState =
1190 m_qxmlStreamReader.attributes().value("chargeState").toUInt();
1191
1192 spectrum_identification_result.spectrumIdentificationItemList.back().experimentalMassToCharge =
1193 m_qxmlStreamReader.attributes().value("experimentalMassToCharge").toDouble();
1194
1195
1196 auto itpeptide =
1197 m_PeptideIdMap.find(m_qxmlStreamReader.attributes().value("peptide_ref").toString());
1198
1199 if(itpeptide == m_PeptideIdMap.end())
1200 {
1201 m_qxmlStreamReader.raiseError(QObject::tr("peptide_ref %1 not defined")
1202 .arg(m_qxmlStreamReader.attributes().value("peptide_ref")));
1203 }
1204 spectrum_identification_result.spectrumIdentificationItemList.back().peptide = itpeptide->second;
1205
1206
1207 while(m_qxmlStreamReader.readNextStartElement())
1208 {
1209 qDebug() << m_qxmlStreamReader.name();
1210 if(m_qxmlStreamReader.name() == QString("PeptideEvidenceRef"))
1211 {
1212
1213 auto itpeptideEvidence = m_MzidPeptideEvidenceIdMap.find(
1214 m_qxmlStreamReader.attributes().value("peptideEvidence_ref").toString());
1215
1216 if(itpeptideEvidence == m_MzidPeptideEvidenceIdMap.end())
1217 {
1218 m_qxmlStreamReader.raiseError(
1219 QObject::tr("peptideEvidence_ref %1 not defined")
1220 .arg(m_qxmlStreamReader.attributes().value("peptideEvidence_ref")));
1221 }
1222
1223 spectrum_identification_result.spectrumIdentificationItemList.back()
1224 .mzidPeptideEvidenceList.push_back(itpeptideEvidence->second);
1225 m_qxmlStreamReader.skipCurrentElement();
1226 }
1227 else if(m_qxmlStreamReader.name() == QString("cvParam"))
1228 {
1229 spectrum_identification_result.spectrumIdentificationItemList.back()
1230 .cvParamList.push_back(readCvParam());
1231 qDebug() << spectrum_identification_result.spectrumIdentificationItemList.back()
1232 .cvParamList.back()
1233 .toString();
1234 }
1235 else if(m_qxmlStreamReader.name() == QString("userParam"))
1236 {
1237 UserParam user_param = readUserParam();
1238 spectrum_identification_result.spectrumIdentificationItemList.back()
1239 .userParamList.push_back(user_param);
1240 qDebug() << user_param.toString();
1241 }
1242 else
1243 {
1244 m_qxmlStreamReader.raiseError(QObject::tr("Error in "
1245 "MzIdentML/DataCollection/AnalysisData/"
1246 "SpectrumIdentificationList/"
1247 "SpectrumIdentificationResult/"
1248 "SpectrumIdentificationItem, unexpected %1 tag")
1249 .arg(m_qxmlStreamReader.name()));
1250 }
1251 }
1252 qDebug();
1253}
1254/*
1255void
1256MzIdentMlReader::processSpectrumIdentificationItem(
1257 SpectrumIdentificationResult &spectrum_identification_result,
1258 const SpectrumIdentificationItem &spectrumIdentificationItem)
1259{
1260
1261
1262 qDebug();
1263
1264 if(spectrum_identification_result.isSpectrumIndex)
1265 {
1266 spectrum_identification_result.scanNum = spectrum_identification_result.spectrumIndex;
1267 }
1268 PeptideEvidence peptide_evidence(
1269 spectrum_identification_result.mzident_source_sp.get()->getMsRunSp().get(),
1270 spectrum_identification_result.scanNum,
1271 spectrum_identification_result.isSpectrumIndex);
1272 peptide_evidence.setRetentionTime(spectrum_identification_result.retentionTime);
1273 peptide_evidence.setCharge(spectrumIdentificationItem.chargeState);
1274 peptide_evidence.setPeptideXtpSp(spectrumIdentificationItem.peptide);
1275 qDebug() << peptide_evidence.getPeptideXtpSp().get()->toAbsoluteString();
1276 peptide_evidence.setChecked(true);
1277 peptide_evidence.setIdentificationDataSource(
1278 spectrum_identification_result.mzident_source_sp.get());
1279 peptide_evidence.setIdentificationEngine(getIdentificationEngine());
1280
1281 peptide_evidence.setExperimentalMassToCharge(spectrumIdentificationItem.experimentalMassToCharge);
1282
1283 qDebug();
1284
1285 // <cvParam cvRef="PSI-MS" accession="MS:1002049"
1286 // name="MS-GF:RawScore" value="356"/> <cvParam cvRef="PSI-MS"
1287 // accession="MS:1002050" name="MS-GF:DeNovoScore" value="369"/>
1288 // <cvParam cvRef="PSI-MS" accession="MS:1002052"
1289 // name="MS-GF:SpecEValue" value="9.149361665076834E-40"/> <cvParam
1290 // cvRef="PSI-MS" accession="MS:1002053" name="MS-GF:EValue"
1291 // value="2.057944235338586E-32"/>
1292 // <userParam name="IsotopeError" value="0"/>
1293 // <userParam name="AssumedDissociationMethod" value="HCD"/>
1294 for(auto cvParam : spectrumIdentificationItem.cvParamList)
1295 {
1296
1297 //<cvParam accession="MS:1002258" cvRef="PSI-MS" value="7"
1298 // name="Comet:matched ions"/>
1299 if(cvParam.accession == "MS:1002049")
1300 {
1301 // PSI-MS MS:1002049 MS-GF:RawScore 356
1302 peptide_evidence.setParam(PeptideEvidenceParam::msgfplus_raw,
1303 QVariant(cvParam.value.toUInt()));
1304 }
1305 else if(cvParam.accession == "MS:1002050")
1306 {
1307 // msgfplus_denovo = 9, ///< MS:1002050 "MS-GF de novo score."
1308 peptide_evidence.setParam(PeptideEvidenceParam::msgfplus_denovo,
1309 QVariant(cvParam.value.toUInt()));
1310 }
1311
1312 else if(cvParam.accession == "MS:1002052")
1313 {
1314 peptide_evidence.setParam(PeptideEvidenceParam::msgfplus_SpecEValue,
1315 QVariant(cvParam.value.toDouble()));
1316 }
1317
1318 else if(cvParam.accession == "MS:1002053")
1319 {
1320 // PSI-MS MS:1002053 MS-GF:EValue
1321 peptide_evidence.setParam(PeptideEvidenceParam::msgfplus_EValue,
1322 QVariant(cvParam.value.toDouble()));
1323 }
1324
1325 else if(cvParam.accession == "MS:1002054")
1326 {
1327 // <cvParam cvRef="PSI-MS" accession="MS:1002054" name="MS-GF:QValue"
1328 // value="0.0"/>
1329 peptide_evidence.setParam(PeptideEvidenceParam::msgfplus_QValue,
1330 QVariant(cvParam.value.toDouble()));
1331 }
1332 else if(cvParam.accession == "MS:1002055")
1333 {
1334 // <cvParam cvRef="PSI-MS" accession="MS:1002055"
1335 // name="MS-GF:PepQValue" value="0.0"/>
1336 peptide_evidence.setParam(PeptideEvidenceParam::msgfplus_PepQValue,
1337 QVariant(cvParam.value.toDouble()));
1338 }
1339 else if(cvParam.accession == "MS:1001331")
1340 {
1341 // PSI-MS MS:1001331 tandem hyperscore
1342 peptide_evidence.setParam(PeptideEvidenceParam::tandem_hyperscore,
1343 QVariant(cvParam.value.toDouble()));
1344 }
1345 else if(cvParam.accession == "MS:1001330")
1346 {
1347 // PSI-MS MS:1001330 X!Tandem:expect
1348 peptide_evidence.setParam(PeptideEvidenceParam::tandem_expectation_value,
1349 QVariant(cvParam.value.toDouble()));
1350 peptide_evidence.setEvalue(cvParam.value.toDouble());
1351 }
1352 else if(cvParam.accession == "MS:1001950")
1353 {
1354 //<cvParam accession="MS:1001950" cvRef="PSI-MS" value="54.90"
1355 // name="PEAKS:peptideScore"/>
1356
1357 peptide_evidence.setParam(PeptideEvidenceParam::peaks_peptide_score,
1358 QVariant(cvParam.value.toDouble()));
1359 }
1360
1361 //msgfplus_energy = 10, ///< MS:1002051 "MS-GF energy score." [PSI:PI]
1362 //msgfplus_SpecEValue = 11, ///< MS:1002052 "MS-GF spectral E-value."
1363 //[PSI:PI] msgfplus_EValue = 12, ///< MS:1002053 "MS-GF E-value."
1364 //[PSI:PI] msgfplus_isotope_error = 13, ///< MS-GF isotope error
1365 //comet_xcorr = 14, ///< MS:1002252 "The Comet result 'XCorr'." [PSI:PI]
1366 //comet_deltacn = 15, ///< MS:1002253 "The Comet result 'DeltaCn'."
1367 //[PSI:PI] comet_deltacnstar = 16, ///< MS:1002254 "The Comet result
1368 //'DeltaCnStar'." [PSI:PI] comet_spscore = 17, ///< MS:1002255 "The Comet
1369 //result 'SpScore'." [PSI:PI] comet_sprank = 18, ///< MS:1002256 "The
1370 //Comet result 'SpRank'." [PSI:PI] comet_expectation_value = 19, ///<
1371 //MS:1002257 "The Comet result 'Expectation value'." [PSI:PI]
1372
1373 else
1374 {
1375 m_qxmlStreamReader.raiseError(
1376 QObject::tr("cvParam %1 is not taken into account").arg(cvParam.toString()));
1377 }
1378 }
1379
1380 qDebug();
1381 for(auto userParam : spectrumIdentificationItem.userParamList)
1382 {
1383 if(userParam.name == "DeepProt:original_count")
1384 {
1385 // <userParam name="DeepProt:original_count" value="7"/>
1386 peptide_evidence.setParam(PeptideEvidenceParam::deepprot_original_count,
1387 QVariant(userParam.value.toUInt()));
1388 }
1389 else if(userParam.name == "DeepProt:fitted_count")
1390 {
1391 // <userParam name="DeepProt:fitted_count" value="7"/>
1392 peptide_evidence.setParam(PeptideEvidenceParam::deepprot_fitted_count,
1393 QVariant(userParam.value.toUInt()));
1394 }
1395 else if(userParam.name == "DeepProt:match_type")
1396 {
1397 peptide_evidence.setParam(
1398 PeptideEvidenceParam::deepprot_match_type,
1399 (std::uint8_t)pappso::DeepProtEnumStr::DeepProtMatchTypeFromString(userParam.value));
1400 }
1401 else if(userParam.name == "DeepProt:status")
1402 {
1403 peptide_evidence.setParam(
1404 PeptideEvidenceParam::deepprot_peptide_candidate_status,
1405 (std::uint8_t)pappso::DeepProtEnumStr::DeepProtPeptideCandidateStatusFromString(
1406 userParam.value));
1407 }
1408 else if(userParam.name == "DeepProt:mass_delta")
1409 {
1410 peptide_evidence.setParam(PeptideEvidenceParam::deepprot_mass_delta,
1411 QVariant(userParam.value.toDouble()));
1412 }
1413 else if(userParam.name == "DeepProt:delta_positions")
1414 {
1415 // DeepProt:delta_positions 4 5 6 7 8 9 10 11 12 13
1416 peptide_evidence.setParam(PeptideEvidenceParam::deepprot_delta_positions,
1417 userParam.value);
1418 }
1419 // <userParam name="IsotopeError" value="0"/>
1420 //<userParam name="AssumedDissociationMethod" value="HCD"/>
1421 }
1422
1423
1424 qDebug();
1425 for(auto mz_peptide_evidence : spectrumIdentificationItem.mzidPeptideEvidenceList)
1426 {
1427 PeptideMatch peptide_match;
1428 peptide_match.setStart(mz_peptide_evidence.start);
1429 peptide_match.setPeptideEvidenceSp(spectrum_identification_result.mzident_source_sp.get()
1430 ->getPeptideEvidenceStore()
1431 .getInstance(&peptide_evidence));
1432
1433
1434 ProteinMatch *p_protein_match =
1435 spectrum_identification_result.identification_group_p->getProteinMatchInstance(
1436 mz_peptide_evidence.protein.get()->getAccession());
1437
1438 p_protein_match->setChecked(true);
1439 // qDebug() << "startElement_protein p_protein_match 3 " <<
1440 // _p_protein_match;
1441 p_protein_match->setProteinXtpSp(mz_peptide_evidence.protein);
1442 p_protein_match->addPeptideMatch(peptide_match);
1443 }
1444 qDebug();
1445}*/
1446
1447void
1449 const SpectrumIdentificationResult &spectrum_identificatio_result)
1450{
1451
1452 mp_cborWriter->startMap(); // one scan
1453 mp_cborWriter->append("id");
1454 mp_cborWriter->startMap(); // id
1455 mp_cborWriter->append("index");
1456 mp_cborWriter->append((qint64)spectrum_identificatio_result.spectrumIndex);
1457 mp_cborWriter->append("native_id");
1458 mp_cborWriter->append(spectrum_identificatio_result.spectrumID);
1459 mp_cborWriter->endMap(); // end id
1460
1461 uint charge = spectrum_identificatio_result.spectrumIdentificationItemList.front().chargeState;
1462 double exp_mz =
1463 spectrum_identificatio_result.spectrumIdentificationItemList.front().experimentalMassToCharge;
1464
1465 mp_cborWriter->append("precursor");
1466 mp_cborWriter->startMap(); // precursor
1467 mp_cborWriter->append("z");
1468 mp_cborWriter->append(charge);
1469 mp_cborWriter->append("mz");
1470 mp_cborWriter->append(exp_mz);
1471 mp_cborWriter->endMap(); // end precursor
1472
1473
1474 mp_cborWriter->append("ms2");
1475 mp_cborWriter->startMap(); // ms2
1476 mp_cborWriter->append("rt");
1477 mp_cborWriter->append(spectrum_identificatio_result.retentionTime);
1478 mp_cborWriter->endMap(); // end ms2
1479
1480
1481 mp_cborWriter->append("psm_list");
1482 mp_cborWriter->startArray(spectrum_identificatio_result.spectrumIdentificationItemList.size());
1483 for(auto &spectrum_ident_item : spectrum_identificatio_result.spectrumIdentificationItemList)
1484 {
1485 if(spectrum_ident_item.chargeState != charge)
1486 { // error
1487 }
1488 if(spectrum_ident_item.experimentalMassToCharge != exp_mz)
1489 { // error
1490 }
1491 writeSpectrumIdentificationItem(spectrum_ident_item);
1492 }
1493 mp_cborWriter->endArray();
1494
1495 mp_cborWriter->endMap(); // end one scan
1496}
1497
1498void
1500 const SpectrumIdentificationItem &spectrum_identification_item)
1501{
1502 mp_cborWriter->startMap(); // psm
1503 mp_cborWriter->append("proforma");
1504 mp_cborWriter->append(spectrum_identification_item.peptide.get()->toProForma());
1505
1506 std::map<QString, std::vector<std::size_t>> map_protein_positions;
1507 for(auto &it_peptide_evidence : spectrum_identification_item.mzidPeptideEvidenceList)
1508 {
1509 auto it_insert =
1510 map_protein_positions.insert({it_peptide_evidence.protein.get()->getAccession(), {}});
1511 it_insert.first->second.push_back(it_peptide_evidence.start);
1512 }
1513 mp_cborWriter->append("protein_list");
1514 mp_cborWriter->startArray(map_protein_positions.size());
1515 for(auto &it_prot_pos : map_protein_positions)
1516 {
1517 mp_cborWriter->startMap();
1518 mp_cborWriter->append("accession");
1519 mp_cborWriter->append(it_prot_pos.first);
1520 mp_cborWriter->append("positions");
1521 mp_cborWriter->writeArray(it_prot_pos.second);
1522 mp_cborWriter->endMap();
1523 }
1524 mp_cborWriter->endArray();
1525
1526
1527 mp_cborWriter->append("eval");
1528 mp_cborWriter->startMap(); // start eval
1529 bool is_ok = false;
1530 is_ok = writeTandemEval(spectrum_identification_item.cvParamList);
1531 mp_cborWriter->endMap(); // end eval
1532
1533 if(!is_ok)
1534 {
1535 throw pappso::PappsoException("This identification engine is not taken into account");
1536 }
1537 mp_cborWriter->endMap(); // end psm
1538}
1539
1540
1541bool
1542MzIdentMlReader::writeTandemEval(const std::vector<CvParam> &cv_param_list)
1543{
1544 // <cvParam accession="MS:1001330" cvRef="PSI-MS" name="X!Tandem:expect"
1545 // value="0.0410883"/>
1546 // <cvParam accession="MS:1001331" cvRef="PSI-MS" name="X!Tandem:hyperscore" value="25.9"/>
1547 auto it_find =
1548 std::find_if(cv_param_list.begin(), cv_param_list.end(), [](const CvParam &cv_param) {
1549 return cv_param.accession == "MS:1001330";
1550 });
1551 if(it_find != cv_param_list.end())
1552 {
1553 mp_cborWriter->append("xtandem");
1554 mp_cborWriter->startMap(); // start tandem
1555 mp_cborWriter->append("evalue");
1556 mp_cborWriter->append(it_find->value.toDouble());
1557 auto it_find =
1558 std::find_if(cv_param_list.begin(), cv_param_list.end(), [](const CvParam &cv_param) {
1559 return cv_param.accession == "MS:1001331";
1560 });
1561 mp_cborWriter->append("hyperscore");
1562 mp_cborWriter->append(it_find->value.toDouble());
1563
1564 mp_cborWriter->endMap(); // end tandem
1565
1566 return true;
1567 }
1568 return false;
1569}
1570
1571} // namespace psm
1572} // namespace cbor
1573} // namespace pappso
static AaModificationP getInstance(const QString &accession)
static AaModificationP getInstanceCustomizedMod(pappso_double modificationMass)
excetion to use when an item type is not recognized
virtual const QString & qwhat() const
static AaModificationP guessAaModificationPbyMonoisotopicMassDelta(Enums::AminoAcidChar aa, pappso_double mass)
Definition utils.cpp:658
static AaModificationP translateAaModificationFromUnimod(const QString &unimod_accession)
Definition utils.cpp:734
static QString getVersion()
Definition utils.cpp:650
virtual bool readFile(const QString &fileName)
overrides QCborStreamWriter base class to provide convenient functions
pappso::cbor::CborStreamWriter * mp_cborWriter
std::map< QString, PeptideSp > m_PeptideIdMap
store association between xml ID and peptide sequence
MzIdentMlReader(pappso::UiMonitorInterface *p_monitor, pappso::cbor::CborStreamWriter *p_output, const QFileInfo &mzident_file)
IdentificationEngine m_identificationEngine
@ MSGFplus
MS:1002048 "MS-GF+ software used to analyze the spectra." [PSI:PI].
@ XTandem
MS:1001476 X!Tandem was used to analyze the spectra.
bool writeTandemEval(const std::vector< CvParam > &cv_param_list)
void writeSpectrumIdentificationItem(const SpectrumIdentificationItem &spectrum_identification_item)
std::map< QString, MzidPeptideEvidence > m_MzidPeptideEvidenceIdMap
store association between xml ID and peptide evidence
pappso::UiMonitorInterface * mp_monitor
void writeSpectrumIdentificationResult(const SpectrumIdentificationResult &spectrum_identificatio_result)
std::map< QString, IdentificationEngine > m_IdentificationEngineMap
store association between xml ID and an identification engine
std::map< QString, MzidSpectraData > m_mzidSpectraDataIdMap
store association between xml ID and SpectraData
std::map< QString, MzidSearchDatabase > m_mzidSearchDatabaseIdMap
store association between xml ID and fasta files
std::map< QString, std::vector< SpectrumIdentificationResult > > m_spectrumIdentificationResultBySpectraIdMap
store all identification results by spectra xml id
void readSpectrumIdentificationItem(SpectrumIdentificationResult &spectrum_identification_result)
std::map< QString, MzidDBSequence > m_MzidDBSequenceIdMap
store association between xml ID and DBSequence
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
const AaModification * AaModificationP
unsigned int uint
Definition types.h:67
std::vector< SpectrumIdentificationItem > spectrumIdentificationItemList
std::shared_ptr< Protein > protein_sp