Skip to content
This repository was archived by the owner on Mar 20, 2025. It is now read-only.

Commit 63fd483

Browse files
committed
XmppSocket: Parse whole stream through QXmlStreamReader
1 parent 0150772 commit 63fd483

12 files changed

+323
-124
lines changed

src/base/Stream.cpp

+247-95
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,15 @@
2222
using namespace QXmpp;
2323
using namespace QXmpp::Private;
2424

25+
// helper for std::visit
26+
template<class... Ts>
27+
struct overloaded : Ts... {
28+
using Ts::operator()...;
29+
};
30+
// explicit deduction guide (not needed as of C++20)
31+
template<class... Ts>
32+
overloaded(Ts...) -> overloaded<Ts...>;
33+
2534
namespace QXmpp::Private {
2635

2736
StreamOpen StreamOpen::fromXml(QXmlStreamReader &reader)
@@ -187,6 +196,103 @@ void StreamErrorElement::toXml(QXmlStreamWriter *writer) const
187196
writer->writeEndElement();
188197
}
189198

199+
static QString restrictedXmlErrorText(QXmlStreamReader::TokenType token)
200+
{
201+
switch (token) {
202+
case QXmlStreamReader::Comment:
203+
return u"XML comments are not allowed in XMPP."_s;
204+
case QXmlStreamReader::DTD:
205+
return u"XML DTDs are not allowed in XMPP."_s;
206+
case QXmlStreamReader::EntityReference:
207+
return u"XML entity references are not allowed in XMPP."_s;
208+
case QXmlStreamReader::ProcessingInstruction:
209+
return u"XML processing instructions are not allowed in XMPP."_s;
210+
default:
211+
return {};
212+
}
213+
}
214+
215+
DomReader::Result DomReader::process(QXmlStreamReader &r)
216+
{
217+
while (true) {
218+
switch (r.tokenType()) {
219+
case QXmlStreamReader::Invalid:
220+
// error received
221+
if (r.error() == QXmlStreamReader::PrematureEndOfDocumentError) {
222+
return Unfinished {};
223+
}
224+
return Error { NotWellFormed, r.errorString() };
225+
case QXmlStreamReader::StartElement: {
226+
auto child = r.prefix().isNull()
227+
? doc.createElement(r.name().toString())
228+
: doc.createElementNS(r.namespaceUri().toString(), r.qualifiedName().toString());
229+
230+
// xmlns attribute
231+
const auto nsDeclarations = r.namespaceDeclarations();
232+
for (const auto &ns : nsDeclarations) {
233+
if (ns.prefix().isEmpty()) {
234+
child.setAttribute(u"xmlns"_s, ns.namespaceUri().toString());
235+
} else {
236+
// namespace declarations are not supported in XMPP
237+
return Error { UnsupportedXmlFeature, u"XML namespace declarations are not allowed in XMPP."_s };
238+
}
239+
}
240+
241+
// other attributes
242+
const auto attributes = r.attributes();
243+
for (const auto &a : attributes) {
244+
child.setAttribute(a.name().toString(), a.value().toString());
245+
}
246+
247+
if (currentElement.isNull()) {
248+
doc.appendChild(child);
249+
} else {
250+
currentElement.appendChild(child);
251+
}
252+
depth++;
253+
currentElement = child;
254+
break;
255+
}
256+
case QXmlStreamReader::EndElement:
257+
Q_ASSERT(depth > 0);
258+
if (depth == 0) {
259+
return Error { InvalidState, u"Invalid state: Received element end instead of element start."_s };
260+
}
261+
262+
currentElement = currentElement.parentNode().toElement();
263+
depth--;
264+
// if top-level element is complete: return
265+
if (depth == 0) {
266+
return doc.documentElement();
267+
}
268+
break;
269+
case QXmlStreamReader::Characters:
270+
// DOM reader must only be used on element start: characters on level 0 are not allowed
271+
Q_ASSERT(depth > 0);
272+
if (depth == 0) {
273+
return Error { InvalidState, u"Invalid state: Received top-level character data instead of element begin."_s };
274+
}
275+
276+
currentElement.appendChild(doc.createTextNode(r.text().toString()));
277+
break;
278+
case QXmlStreamReader::NoToken:
279+
// skip
280+
break;
281+
case QXmlStreamReader::StartDocument:
282+
case QXmlStreamReader::EndDocument:
283+
Q_ASSERT_X(false, "DomReader", "Received document begin or end.");
284+
return Error { InvalidState, u"Invalid state: Received document begin or end."_s };
285+
break;
286+
case QXmlStreamReader::Comment:
287+
case QXmlStreamReader::DTD:
288+
case QXmlStreamReader::EntityReference:
289+
case QXmlStreamReader::ProcessingInstruction:
290+
return Error { UnsupportedXmlFeature, restrictedXmlErrorText(r.tokenType()) };
291+
}
292+
r.readNext();
293+
}
294+
}
295+
190296
XmppSocket::XmppSocket(QObject *parent)
191297
: QXmppLoggable(parent)
192298
{
@@ -206,16 +312,20 @@ void XmppSocket::setSocket(QSslSocket *socket)
206312

207313
// do not emit started() with direct TLS (this happens in encrypted())
208314
if (!m_directTls) {
209-
m_dataBuffer.clear();
210-
m_streamOpenElement.clear();
315+
m_reader.clear();
316+
m_streamReceived = false;
211317
Q_EMIT started();
212318
}
213319
});
320+
QObject::connect(socket, &QAbstractSocket::disconnected, this, [this]() {
321+
// reset error state
322+
m_errorOccurred = false;
323+
});
214324
QObject::connect(socket, &QSslSocket::encrypted, this, [this]() {
215325
debug(u"Socket encrypted"_s);
216326
// this happens with direct TLS or STARTTLS
217-
m_dataBuffer.clear();
218-
m_streamOpenElement.clear();
327+
m_reader.clear();
328+
m_streamReceived = false;
219329
Q_EMIT started();
220330
});
221331
QObject::connect(socket, &QSslSocket::errorOccurred, this, [this](QAbstractSocket::SocketError) {
@@ -271,108 +381,150 @@ bool XmppSocket::sendData(const QByteArray &data)
271381
return m_socket->write(data) == data.size();
272382
}
273383

274-
void XmppSocket::processData(const QString &data)
384+
void XmppSocket::throwStreamError(const StreamErrorElement &error)
275385
{
276-
// As we may only have partial XML content, we need to cache the received
277-
// data until it has been successfully parsed. In case it can't be parsed,
278-
//
279-
// There are only two small problems with the current strategy:
280-
// * When we receive a full stanza + a partial one, we can't parse the
281-
// first stanza until another stanza arrives that is complete.
282-
// * We don't know when we received invalid XML (would cause a growing
283-
// cache and a timeout after some time).
284-
// However, both issues could only be solved using an XML stream reader
285-
// which would cause many other problems since we don't actually use it for
286-
// parsing the content.
287-
m_dataBuffer.append(data);
288-
289-
//
290-
// Check for whitespace pings
291-
//
292-
if (m_dataBuffer.isEmpty() || m_dataBuffer.trimmed().isEmpty()) {
293-
m_dataBuffer.clear();
386+
Q_ASSERT(!m_errorOccurred);
387+
m_errorOccurred = true;
294388

295-
logReceived({});
296-
Q_EMIT stanzaReceived(QDomElement());
297-
return;
298-
}
389+
sendData(serializeXml(error));
390+
m_socket->disconnectFromHost();
391+
Q_EMIT streamErrorSent(error);
392+
}
299393

300-
//
301-
// Check whether we received a stream open or closing tag
302-
//
303-
static const QRegularExpression streamStartRegex(uR"(^(<\?xml.*\?>)?\s*<stream:stream[^>]*>)"_s);
304-
static const QRegularExpression streamEndRegex(u"</stream:stream>$"_s);
305-
306-
auto streamOpenMatch = streamStartRegex.match(m_dataBuffer);
307-
bool hasStreamOpen = streamOpenMatch.hasMatch();
308-
309-
bool hasStreamClose = streamEndRegex.match(m_dataBuffer).hasMatch();
310-
311-
//
312-
// The stream start/end and stanza packets can't be parsed without any
313-
// modifications with QDomDocument. This is because of multiple reasons:
314-
// * The <stream:stream> open element is not considered valid without the
315-
// closing tag.
316-
// * Only the closing tag is of course not valid too.
317-
// * Stanzas/Nonzas need to have the correct stream namespaces set:
318-
// * For being able to parse <stream:features/>
319-
// * For having the correct namespace (e.g. 'jabber:client') set to
320-
// stanzas and their child elements (e.g. <body/> of a message).
321-
//
322-
// The wrapping strategy looks like this:
323-
// * The stream open tag is cached once it arrives, for later access
324-
// * Incoming XML that has no <stream> open tag will be prepended by the
325-
// cached <stream> tag.
326-
// * Incoming XML that has no <stream> close tag will be appended by a
327-
// generic string "</stream:stream>"
328-
//
329-
// The result is parsed by QDomDocument and the child elements of the stream
330-
// are processed. In case the received data contained a stream open tag,
331-
// the stream is processed (before the stanzas are processed). In case we
332-
// received a </stream> closing tag, the connection is closed.
333-
//
334-
auto wrappedStanzas = m_dataBuffer;
335-
if (!hasStreamOpen) {
336-
wrappedStanzas.prepend(m_streamOpenElement);
337-
}
338-
if (!hasStreamClose) {
339-
wrappedStanzas.append(u"</stream:stream>"_s);
394+
void XmppSocket::processData(const QString &data)
395+
{
396+
// stop parsing after an error has occurred
397+
if (m_errorOccurred) {
398+
return;
340399
}
341400

342-
//
343-
// Try to parse the wrapped XML
344-
//
345-
QDomDocument doc;
346-
#if QT_VERSION >= QT_VERSION_CHECK(6, 5, 0)
347-
if (!doc.setContent(wrappedStanzas, QDomDocument::ParseOption::UseNamespaceProcessing)) {
348-
#else
349-
if (!doc.setContent(wrappedStanzas, true)) {
350-
#endif
401+
// Check for whitespace pings
402+
if (data.isEmpty()) {
403+
logReceived({});
404+
Q_EMIT stanzaReceived(QDomElement());
351405
return;
352406
}
353407

354-
//
355-
// Success: We can clear the buffer and send a 'received' log message
356-
//
357-
logReceived(m_dataBuffer);
358-
m_dataBuffer.clear();
359-
360-
// process stream start
361-
if (hasStreamOpen) {
362-
m_streamOpenElement = streamOpenMatch.captured();
363-
Q_EMIT streamReceived(doc.documentElement());
364-
}
408+
// log data received and process
409+
logReceived(data);
410+
m_reader.addData(data);
411+
412+
// 'm_reader' parses the XML stream and 'm_domReader' creates DOM elements with the data from
413+
// 'm_reader'. 'm_domReader' lives as long as one stanza element is parsed.
414+
415+
auto readDomElement = [this]() {
416+
return std::visit(
417+
overloaded {
418+
[this](const QDomElement &element) {
419+
m_domReader.reset();
420+
Q_EMIT stanzaReceived(element);
421+
return true;
422+
},
423+
[](DomReader::Unfinished) {
424+
return false;
425+
},
426+
[this](const DomReader::Error &error) {
427+
switch (error.type) {
428+
case DomReader::InvalidState:
429+
throwStreamError({
430+
StreamError::InternalServerError,
431+
u"Experienced internal error while parsing XML."_s,
432+
});
433+
break;
434+
case DomReader::NotWellFormed:
435+
throwStreamError({
436+
StreamError::NotWellFormed,
437+
u"Not well-formed: "_s + error.text,
438+
});
439+
break;
440+
case DomReader::UnsupportedXmlFeature:
441+
throwStreamError({ StreamError::RestrictedXml, error.text });
442+
break;
443+
}
444+
return false;
445+
},
446+
},
447+
m_domReader->process(m_reader));
448+
};
365449

366-
// process stanzas
367-
auto stanza = doc.documentElement().firstChildElement();
368-
for (; !stanza.isNull(); stanza = stanza.nextSiblingElement()) {
369-
Q_EMIT stanzaReceived(stanza);
450+
// we're still reading a previously started top-level element
451+
if (m_domReader) {
452+
m_reader.readNext();
453+
if (!readDomElement()) {
454+
return;
455+
}
370456
}
371457

372-
// process stream end
373-
if (hasStreamClose) {
374-
Q_EMIT streamClosed();
375-
}
458+
do {
459+
switch (m_reader.readNext()) {
460+
case QXmlStreamReader::Invalid:
461+
// error received
462+
if (m_reader.error() != QXmlStreamReader::PrematureEndOfDocumentError) {
463+
return throwStreamError({ StreamError::NotWellFormed, m_reader.errorString() });
464+
}
465+
break;
466+
case QXmlStreamReader::StartDocument:
467+
// pre-stream open
468+
break;
469+
case QXmlStreamReader::EndDocument:
470+
// post-stream close
471+
break;
472+
case QXmlStreamReader::StartElement:
473+
// stream open or stream-level element
474+
if (m_reader.name() == u"stream" && m_reader.namespaceUri() == ns_stream) {
475+
// check for 'stream:stream' (this is required by the spec)
476+
if (m_reader.prefix() != u"stream") {
477+
throwStreamError({
478+
StreamError::BadNamespacePrefix,
479+
u"Top-level stream element must have a namespace prefix of 'stream'."_s,
480+
});
481+
return;
482+
}
483+
484+
m_streamReceived = true;
485+
Q_EMIT streamReceived(StreamOpen::fromXml(m_reader));
486+
} else if (!m_streamReceived) {
487+
throwStreamError({
488+
StreamError::BadFormat,
489+
u"Invalid element received. Expected 'stream' element qualified by 'http://etherx.jabber.org/streams' namespace."_s,
490+
});
491+
return;
492+
} else {
493+
// parse top-level stream element
494+
m_domReader = DomReader();
495+
if (!readDomElement()) {
496+
return;
497+
}
498+
}
499+
break;
500+
case QXmlStreamReader::EndElement:
501+
// end of stream
502+
Q_EMIT streamClosed();
503+
break;
504+
case QXmlStreamReader::Characters:
505+
if (m_reader.isWhitespace()) {
506+
logReceived({});
507+
Q_EMIT stanzaReceived(QDomElement());
508+
} else {
509+
// invalid: emit error
510+
throwStreamError({
511+
StreamError::BadFormat,
512+
u"Top-level, non-whitespace character data is not allowed in XMPP."_s,
513+
});
514+
return;
515+
}
516+
break;
517+
case QXmlStreamReader::NoToken:
518+
// skip
519+
break;
520+
case QXmlStreamReader::Comment:
521+
case QXmlStreamReader::DTD:
522+
case QXmlStreamReader::EntityReference:
523+
case QXmlStreamReader::ProcessingInstruction:
524+
throwStreamError({ StreamError::RestrictedXml, restrictedXmlErrorText(m_reader.tokenType()) });
525+
return;
526+
}
527+
} while (!m_reader.hasError());
376528
}
377529

378530
} // namespace QXmpp::Private

0 commit comments

Comments
 (0)