libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
sagereader.cpp
Go to the documentation of this file.
1/**
2 * \file input/sage/sagereader.cpp
3 * \date 21/08/2024
4 * \author Olivier Langella
5 * \brief read data files from Sage output
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2024 Olivier Langella
10 *<Olivier.Langella@universite-paris-saclay.fr>.
11 *
12 * This file is part of i2MassChroQ.
13 *
14 * i2MassChroQ is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * i2MassChroQ is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with i2MassChroQ. If not, see <http://www.gnu.org/licenses/>.
26 *
27 ******************************************************************************/
28
29#include "sagereader.h"
30#include <QJsonObject>
31#include <QJsonArray>
32#include <odsstream/tsvreader.h>
33#include <odsstream/odsexception.h>
34#include <QUrl>
35#include <qcontainerfwd.h>
36#include <qfileinfo.h>
37#include <qlogging.h>
38#include <qobject.h>
39#include "sagetsvhandler.h"
44
47 const pappso::cbor::psm::SageFileReader &sage_file_reader,
48 const QString &sage_json_file)
49 : m_sageFileReader(sage_file_reader)
50{
51 mp_monitor = p_monitor;
52 mp_cborWriter = p_output;
53 m_jsonAbsoluteFilePath = sage_json_file;
54}
55
59
60const QString &
65
66
67void
72
78
79void
81 const QString &sequence_in)
82{
83 QString accession = description_in.split(" ", Qt::SkipEmptyParts).at(0);
84 try
85 {
86 const PsmProtein &psm_protein = mp_self->m_psmProteinMap.getByAccession(accession);
87 psm_protein.protein_sp.get()->setSequence(sequence_in);
88 psm_protein.protein_sp.get()->setDescription(description_in);
89 }
91 {
92 }
93 try
94 {
95 accession = accession.prepend(m_decoyTag);
96 const PsmProtein &psm_protein = mp_self->m_psmProteinMap.getByAccession(accession);
97 psm_protein.protein_sp.get()->setSequence(sequence_in);
98 psm_protein.protein_sp.get()->setDescription(description_in);
99 psm_protein.protein_sp.get()->reverse();
100 }
101 catch(pappso::ExceptionNotFound &err)
102 {
103 }
104}
105
111
112
113void
115{
116
117 extractMzmlPathList(m_sageFileReader.getJsonDocument());
118 // getTsvFilePath(mp_identificationDataSource->getJsonDocument().object());
119 QString file_str = getTsvFilePath(m_sageFileReader.getJsonDocument());
120 QFileInfo tsv_file_info(file_str);
122 try
123 {
124 TsvReader tsv_reader(handler);
125
126 QFile tsv_file(tsv_file_info.absoluteFilePath());
127 tsv_reader.parse(tsv_file);
128 tsv_file.close();
129 }
130 catch(OdsException &error_ods)
131 {
132 throw pappso::PappsoException(QObject::tr("Error reading %1 file:\n %2")
133 .arg(tsv_file_info.absoluteFilePath())
134 .arg(error_ods.qwhat()));
135 }
136
137
138 // collect protein sequences
139 QFile fastaFile(getFastaFilePath(m_sageFileReader.getJsonDocument()));
140 SageReader::FastaSeq seq(this);
141 pappso::FastaReader reader(seq);
142 reader.parse(fastaFile);
143
144 qDebug();
145 mp_cborWriter->append("protein_map");
147
148
149 mp_cborWriter->append("sample_list");
150 mp_cborWriter->startArray();
151 try
152 {
153 handler.writeSampleList();
154 }
155 catch(OdsException &error_ods)
156 {
157 throw pappso::PappsoException(QObject::tr("Error reading %1 file:\n %2")
158 .arg(tsv_file_info.absoluteFilePath())
159 .arg(error_ods.qwhat()));
160 }
161
162 mp_cborWriter->endArray();
163}
164
165void
167{
168
169 QJsonObject sage_object = json_doc.object();
170 QJsonValue json_mzml_path_list = sage_object.value("mzml_paths");
171 if(json_mzml_path_list.isUndefined())
172 {
173 throw pappso::ExceptionNotFound(QObject::tr("mzml_paths not found in Sage json document"));
174 }
175 m_mzmlPathList.clear();
176
177 for(auto path_mzml : json_mzml_path_list.toArray())
178 {
179 m_mzmlPathList << convertToLocalFileOrDie(path_mzml.toString());
180 }
181}
182
183const QString &
184pappso::cbor::psm::SageReader::getMzmlPath(const QString &file_msrun) const
185{
186 for(auto &file_path : m_mzmlPathList)
187 {
188 if(file_path.endsWith(file_msrun))
189 return file_path;
190 }
192 QObject::tr("MS run %1 not found in Sage json document").append(file_msrun));
193}
194
195
196QString
198{
199 QString path;
200 QJsonObject sage_object = json_doc.object();
201 QJsonValue output_path = sage_object.value("output_paths");
202 if(output_path.isUndefined())
203 {
204 throw pappso::ExceptionNotFound(QObject::tr("output_paths not found in Sage json document"));
205 }
206
207 if(!output_path.isArray())
208 {
209 throw pappso::ExceptionNotFound(QObject::tr("output_paths is not an array"));
210 }
211 for(auto element : output_path.toArray())
212 {
213 if(element.isString())
214 {
215 if(element.toString().endsWith(".tsv"))
216 {
217 path = element.toString();
218 }
219 }
220 }
221
222 return convertToLocalFileOrDie(path);
223}
224
225QString
227{
228 QString path;
229 QJsonObject sage_object = json_doc.object();
230 QJsonValue database = sage_object.value("database");
231 if(database.isUndefined())
232 {
233 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
234 }
235 path = database.toObject().value("fasta").toString();
236 if(path.isEmpty())
237 {
238 throw pappso::ExceptionNotFound(QObject::tr("fasta value is empty"));
239 }
240
241
242 return convertToLocalFileOrDie(path);
243}
244
245QString
247{
248
249 // if we have an URL : convert it to local file
250 qDebug() << path;
251 if(path.startsWith("file:") || path.startsWith("http:") || path.startsWith("https:"))
252 {
253 QUrl tsv_url(path);
254 if(tsv_url.isValid())
255 {
256 qDebug() << "tsv_url.isValid()";
257 if(tsv_url.isLocalFile())
258 {
259 qDebug() << "tsv_url.isLocalFile()";
260 return tsv_url.toLocalFile();
261 }
262 else
263 {
265 QObject::tr("Unable to load data from remote URL %1").arg(tsv_url.toString()));
266 }
267 }
268 }
269 return path;
270}
271
272std::vector<pappso::cbor::psm::SageReader::SageModification>
274{
275 std::vector<SageReader::SageModification> list;
276 QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
277 QJsonValue database = sage_object.value("database");
278 if(database.isUndefined())
279 {
280 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
281 }
282
283 QJsonValue static_mods = database.toObject().value("static_mods");
284 if(static_mods.isUndefined())
285 {
286 throw pappso::ExceptionNotFound(QObject::tr("static_mods not found in Sage json document"));
287 }
288 for(QString residue_str : static_mods.toObject().keys())
289 {
290 SageModification modif;
291 modif.residue = residue_str.at(0);
293 (Enums::AminoAcidChar)modif.residue.toLatin1(),
294 static_mods.toObject().value(residue_str).toDouble());
295 modif.strModification =
296 QString::number(static_mods.toObject().value(residue_str).toDouble(), 'f', 6);
297 if(modif.strModification.isEmpty())
298 {
299 throw pappso::PappsoException(QObject::tr(" modif.strModification is empty"));
300 }
301 if(modif.modification->getMass() < 0)
302 {
303 modif.strModification = QString("[%1]").arg(modif.strModification);
304 }
305 else
306 {
307 modif.strModification = QString("[+%1]").arg(modif.strModification);
308 }
309 list.push_back(modif);
310 }
311 return list;
312}
313
314std::vector<pappso::cbor::psm::SageReader::SageModification>
316{
317 std::vector<SageReader::SageModification> list;
318 QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
319 QJsonValue database = sage_object.value("database");
320 if(database.isUndefined())
321 {
322 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
323 }
324
325 QJsonValue var_mods = database.toObject().value("variable_mods");
326 if(var_mods.isUndefined())
327 {
328 throw pappso::ExceptionNotFound(QObject::tr("static_mods not found in Sage json document"));
329 }
330 for(QString residue_str : var_mods.toObject().keys())
331 {
332 SageModification modif;
333 modif.residue = residue_str.at(0);
334 for(QJsonValue one_mass : var_mods.toObject().value(residue_str).toArray())
335 {
337 (Enums::AminoAcidChar)modif.residue.toLatin1(), one_mass.toDouble());
338 modif.strModification = QString::number(one_mass.toDouble(), 'f', 6);
339 if(modif.strModification.isEmpty())
340 {
341 throw pappso::PappsoException(QObject::tr(" modif.strModification is empty"));
342 }
343 if(modif.modification->getMass() < 0)
344 {
345 modif.strModification = QString("[%1]").arg(modif.strModification);
346 }
347 else
348 {
349 modif.strModification = QString("[+%1]").arg(modif.strModification);
350 }
351 list.push_back(modif);
352 }
353 }
354 return list;
355}
356
357QString
359{
360 QString path;
361 QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
362 QJsonValue database = sage_object.value("database");
363 if(database.isUndefined())
364 {
365 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
366 }
367 path = database.toObject().value("decoy_tag").toString();
368 if(path.isEmpty())
369 {
370 throw pappso::ExceptionNotFound(QObject::tr("decoy_tag value is empty"));
371 }
372 return path;
373}
374
pappso_double getMass() const
void parse(QFile &fastaFile)
static AaModificationP guessAaModificationPbyMonoisotopicMassDelta(Enums::AminoAcidChar aa, pappso_double mass)
Definition utils.cpp:658
overrides QCborStreamWriter base class to provide convenient functions
void setSequence(const QString &description_in, const QString &sequence_in) override
const QString & getMzmlPath(const QString &file_msrun) const
SageReader(pappso::UiMonitorInterface *p_monitor, pappso::cbor::CborStreamWriter *p_output, const SageFileReader &sage_file_reader, const QString &sage_json_file)
std::vector< SageModification > getStaticModificationList() const
pappso::cbor::CborStreamWriter * mp_cborWriter
Definition sagereader.h:100
const QString & getmJsonAbsoluteFilePath() const
void extractMzmlPathList(const QJsonDocument &json_doc)
pappso::cbor::CborStreamWriter & getCborStreamWriter() const
const SageFileReader & m_sageFileReader
Definition sagereader.h:98
std::vector< SageModification > getVariableModificationList() const
QString getFastaFilePath(const QJsonDocument &json_doc)
const SageFileReader & getSageFileReader() const
QString getTsvFilePath(const QJsonDocument &json_doc)
pappso::UiMonitorInterface * mp_monitor
Definition sagereader.h:99
QString convertToLocalFileOrDie(const QString &file_str) const
std::shared_ptr< Protein > protein_sp