54 return QString(
"%1 %2").arg(
name).arg(
value);
60 const QFileInfo &mzident_file)
64 qDebug() << mzident_file.absoluteFilePath() <<
"'";
79 if(!
readFile(mzident_file.absoluteFilePath()))
85 QObject::tr(
"Error reading %1 not mzIdentML file :\n %2")
86 .arg(mzident_file.absoluteFilePath())
92 .arg(mzident_file.absoluteFilePath())
107 .arg(mzident_file.absoluteFilePath())
108 .arg(other_err.
qwhat()));
124 QStringList fasta_files;
127 fasta_files << pair_searchdb.second.file;
138 psm_protein.
isTarget = !pair_sequence.second.is_decoy;
139 psm_protein.
protein_sp = pair_sequence.second.protein_sp;
149 for(std::pair<
const QString, std::vector<SpectrumIdentificationResult>> &pair_spectra :
294 QCborMap identification_engine_parameters;
331 identification_engine_parameters.insert(
352 identification_engine_parameters.toCborValue());
383 QString analysisSoftwareId;
404 if(software_name ==
"SpecOMS")
410 else if(software_name ==
"DeepProt")
431 else if(cv_param.
accession ==
"MS:1002048")
435 else if(cv_param.
accession ==
"MS:1001946")
450 switch(analysisSotwareNameFound)
463 QObject::tr(
"identification results from %1 are not supported yet, "
466 .arg(software_name));
472 std::pair<QString, IdentificationEngine>(analysisSoftwareId, analysisSotwareNameFound));
474 if(it.second ==
false)
476 it.first->second = analysisSotwareNameFound;
511 QObject::tr(
"Error in MzIdentML input file, %1 no DBSequence")
531 QString accession_description = dbsequence.
accession;
550 accession_description.append(
" ").append(dbsequence.
description);
552 else if(cv_param.
accession ==
"MS:1001195")
573 QObject::tr(
"Error in MzIdentML/DBSequence unexpected %1 tag")
581 dbsequence.
protein_sp = std::make_shared<Protein>(accession_description, dbsequence.
sequence);
620 std::shared_ptr<pappso::Peptide> peptide_sp;
629 peptide_sp = std::make_shared<Peptide>(
m_qxmlStreamReader.readElementText().simplified());
635 QObject::tr(
"Error in MzIdentML/Peptide no PeptideSequence"));
652 bool is_cv_param =
false;
695 qInfo() <<
"MzIdentMlSaxHandler::endElement_"
696 "Modification unknown "
707 QObject::tr(
"Error in MzIdentML/Peptide/Modification/cvParam "
708 "modification accession %1 not found")
715 peptide_sp.get()->addAaModification(modif, 0);
719 peptide_sp.get()->addAaModification(modif, modification.
location - 1);
726 QObject::tr(
"Error in MzIdentML/Peptide/Modification "
732 if(is_cv_param ==
false)
739 modif_without_cvparam =
741 peptide_sp.get()->getAa(0).getAminoAcidChar(),
743 peptide_sp.get()->addAaModification(modif_without_cvparam, 0);
747 modif_without_cvparam =
749 peptide_sp.get()->getAa(modification.
location - 1).getAminoAcidChar(),
751 peptide_sp.get()->addAaModification(modif_without_cvparam,
760 m_qxmlStreamReader.raiseError(QObject::tr(
"Error in MzIdentML/Peptide unexpected %1 tag")
766 m_PeptideIdMap.insert(std::pair<QString, PeptideSp>(xml_id, peptide_sp));
780 QObject::tr(
"dBSequence_ref %1 not defined")
785 pe.
protein = itprot->second.protein_sp;
792 QObject::tr(
"peptide_ref %1 not defined")
833 QObject::tr(
"Error in MzIdentML/AnalysisCollection, unexpected %1 t")
871 QObject::tr(
"Error in MzIdentML/DataCollection, unexpected %1 tag")
902 QObject::tr(
"Error in MzIdentML/DataCollection/Inputs, unexpected %1 tag")
946 if(search_database.
file.isEmpty())
948 m_qxmlStreamReader.raiseError(QObject::tr(
"SearchDatabase id %1 location is empty").arg(
id));
1002 QObject::tr(
"Error in "
1003 "MzIdentML/DataCollection/AnalysisData/"
1004 "SpectrumIdentificationList, unexpected %1 tag")
1018 QObject::tr(
"Error in MzIdentML/DataCollection/AnalysisData, "
1019 "unexpected %1 tag")
1033 QString spectra_id =
m_qxmlStreamReader.attributes().value(
"spectraData_ref").toString();
1039 "m_mzidSpectraDataIdMap")
1045 it_insert_ident.first->second.push_back({});
1047 it_insert_ident.first->second.back();
1048 spectrum_identification_result.
cvParamList.clear();
1071 spectrum_identification_result.
cvParamList.push_back(cv_param);
1082 QObject::tr(
"Error in "
1083 "MzIdentML/DataCollection/AnalysisData/"
1084 "SpectrumIdentificationList/"
1085 "SpectrumIdentificationResult, unexpected %1 tag")
1098 spectrum_identification_result.
scanNum = 0;
1102 if(spectrum_identification_result.
spectrumID.startsWith(
"index="))
1106 spectrum_identification_result.
spectrumID.mid(6).toULongLong(&is_ok);
1111 "SpectrumIdentificationResult id %2")
1112 .arg(spectrum_identification_result.
spectrumID)
1113 .arg(spectrum_identification_result.
id));
1120 for(
auto cvParam : spectrum_identification_result.
cvParamList)
1123 qDebug() << cvParam.toString();
1124 if(cvParam.accession ==
"MS:1001115")
1126 spectrum_identification_result.
scanNum = cvParam.value.toUInt();
1128 else if(cvParam.accession ==
"MS:1003062")
1131 spectrum_identification_result.
spectrumIndex = cvParam.value.toUInt();
1133 else if((cvParam.accession ==
"MS:1000016") || (cvParam.accession ==
"MS:1000894"))
1148 spectrum_identification_result.
retentionTime = cvParam.value.toDouble();
1151 if((spectrum_identification_result.
scanNum == 0) &&
1154 m_qxmlStreamReader.raiseError(QObject::tr(
"scan number or spectrum index not found in "
1155 "SpectrumIdentificationResult id %1")
1156 .arg(spectrum_identification_result.
id));
1163 QObject::tr(
"retention time not found in SpectrumIdentificationResult id %1")
1164 .arg(spectrum_identification_result.
id));
1168 for(
auto spectrumIdentificationItem :
1185 .mzidPeptideEvidenceList.clear();
1219 QObject::tr(
"peptideEvidence_ref %1 not defined")
1224 .mzidPeptideEvidenceList.push_back(itpeptideEvidence->second);
1239 .userParamList.push_back(user_param);
1245 "MzIdentML/DataCollection/AnalysisData/"
1246 "SpectrumIdentificationList/"
1247 "SpectrumIdentificationResult/"
1248 "SpectrumIdentificationItem, unexpected %1 tag")
1485 if(spectrum_ident_item.chargeState != charge)
1488 if(spectrum_ident_item.experimentalMassToCharge != exp_mz)
1506 std::map<QString, std::vector<std::size_t>> map_protein_positions;
1510 map_protein_positions.insert({it_peptide_evidence.protein.get()->getAccession(), {}});
1511 it_insert.first->second.push_back(it_peptide_evidence.start);
1515 for(
auto &it_prot_pos : map_protein_positions)
1548 std::find_if(cv_param_list.begin(), cv_param_list.end(), [](
const CvParam &cv_param) {
1549 return cv_param.accession ==
"MS:1001330";
1551 if(it_find != cv_param_list.end())
1558 std::find_if(cv_param_list.begin(), cv_param_list.end(), [](
const CvParam &cv_param) {
1559 return cv_param.accession ==
"MS:1001331";
static AaModificationP getInstance(const QString &accession)
static AaModificationP getInstanceCustomizedMod(pappso_double modificationMass)
excetion to use when an item type is not recognized
virtual const QString & qwhat() const
static AaModificationP guessAaModificationPbyMonoisotopicMassDelta(Enums::AminoAcidChar aa, pappso_double mass)
static AaModificationP translateAaModificationFromUnimod(const QString &unimod_accession)
static QString getVersion()
QString errorString() const
QXmlStreamReader m_qxmlStreamReader
virtual bool readFile(const QString &fileName)
overrides QCborStreamWriter base class to provide convenient functions
pappso::cbor::CborStreamWriter * mp_cborWriter
UserParam readUserParam()
std::map< QString, PeptideSp > m_PeptideIdMap
store association between xml ID and peptide sequence
MzIdentMlReader(pappso::UiMonitorInterface *p_monitor, pappso::cbor::CborStreamWriter *p_output, const QFileInfo &mzident_file)
void readSpectrumIdentification()
IdentificationEngine m_identificationEngine
bool readPeptideEvidence()
virtual void readStream() override
@ SpecOMS
SpecOMS C++ implementation.
@ MSGFplus
MS:1002048 "MS-GF+ software used to analyze the spectra." [PSI:PI].
@ XTandem
MS:1001476 X!Tandem was used to analyze the spectra.
@ PEAKS_Studio
PEAKS Studio.
PsmProteinMap m_proteinMap
bool readAnalysisCollectionItem()
bool readSequenceCollectionItem()
bool writeTandemEval(const std::vector< CvParam > &cv_param_list)
void readSpectrumIdentificationResult()
bool readSearchDatabase()
QCborMap m_cborParameterMap
void writeSpectrumIdentificationItem(const SpectrumIdentificationItem &spectrum_identification_item)
void readAnalysisProtocolCollection()
std::map< QString, MzidPeptideEvidence > m_MzidPeptideEvidenceIdMap
store association between xml ID and peptide evidence
pappso::UiMonitorInterface * mp_monitor
void writeSpectrumIdentificationResult(const SpectrumIdentificationResult &spectrum_identificatio_result)
bool readAnalysisSoftware()
std::map< QString, IdentificationEngine > m_IdentificationEngineMap
store association between xml ID and an identification engine
std::map< QString, MzidSpectraData > m_mzidSpectraDataIdMap
store association between xml ID and SpectraData
std::map< QString, MzidSearchDatabase > m_mzidSearchDatabaseIdMap
store association between xml ID and fasta files
std::map< QString, std::vector< SpectrumIdentificationResult > > m_spectrumIdentificationResultBySpectraIdMap
store all identification results by spectra xml id
bool readDataCollectionItem()
virtual ~MzIdentMlReader()
void readSpectrumIdentificationItem(SpectrumIdentificationResult &spectrum_identification_result)
std::map< QString, MzidDBSequence > m_MzidDBSequenceIdMap
store association between xml ID and DBSequence
QString m_analysisSoftwareVersion
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
const AaModification * AaModificationP
double monoisotopicMassDelta
std::vector< CvParam > cvParamList
QString searchDatabase_ref
std::shared_ptr< Protein > protein_sp
std::vector< CvParam > cvParamList
std::vector< MzidPeptideEvidence > mzidPeptideEvidenceList
std::vector< SpectrumIdentificationItem > spectrumIdentificationItemList
std::vector< UserParam > userParamList
std::vector< CvParam > cvParamList
std::size_t spectrumIndex
std::shared_ptr< Protein > protein_sp