From 7cbff80a9b8d3364620fac351e4ea184963377a3 Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Thu, 1 Aug 2024 11:18:50 -0400 Subject: [PATCH] Serialize the source excerpts from and to pure json Signed-off-by: Adam Treat --- gpt4all-chat/chatllm.cpp | 9 +- gpt4all-chat/chatmodel.h | 149 +++++++++------------------------ gpt4all-chat/qml/ChatView.qml | 18 ++-- gpt4all-chat/server.cpp | 16 +--- gpt4all-chat/sourceexcerpt.cpp | 60 ++++++++++--- gpt4all-chat/sourceexcerpt.h | 82 ++++++++++-------- 6 files changed, 149 insertions(+), 185 deletions(-) diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index 964b218d472b..89e412d61ec0 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -798,10 +798,8 @@ bool ChatLLM::promptInternal(const QList &collectionList, const QString QString docsContext; if (!localDocsExcerpts.isEmpty()) { // FIXME(adam): we should be using the new tool template if available otherwise this I guess - QStringList results; - for (const SourceExcerpt &info : localDocsExcerpts) - results << u"Collection: %1\nPath: %2\nExcerpt: %3"_s.arg(info.collection, info.path, info.text); - docsContext = u"### Context:\n%1\n\n"_s.arg(results.join("\n\n")); + QString json = SourceExcerpt::toJson(localDocsExcerpts); + docsContext = u"### Context:\n%1\n\n"_s.arg(json); } int n_threads = MySettings::globalInstance()->threadCount(); @@ -910,9 +908,6 @@ bool ChatLLM::promptInternal(const QList &collectionList, const QString emit sourceExcerptsChanged(sourceExcerpts); } - // Erase the context of the tool call - m_ctx.n_past = std::max(0, m_ctx.n_past); - m_ctx.tokens.erase(m_ctx.tokens.end() - m_promptResponseTokens, m_ctx.tokens.end()); m_promptResponseTokens = 0; m_promptTokens = 0; m_response = std::string(); diff --git a/gpt4all-chat/chatmodel.h b/gpt4all-chat/chatmodel.h index 97b812750b4e..19be1cf31216 100644 --- a/gpt4all-chat/chatmodel.h +++ b/gpt4all-chat/chatmodel.h @@ -29,7 +29,6 @@ struct ChatItem Q_PROPERTY(bool thumbsUpState MEMBER thumbsUpState) Q_PROPERTY(bool thumbsDownState MEMBER thumbsDownState) Q_PROPERTY(QList sources MEMBER sources) - Q_PROPERTY(QList consolidatedSources MEMBER consolidatedSources) public: // TODO: Maybe we should include the model name here as well as timestamp? @@ -39,7 +38,6 @@ struct ChatItem QString prompt; QString newResponse; QList sources; - QList consolidatedSources; bool currentResponse = false; bool stopped = false; bool thumbsUpState = false; @@ -65,8 +63,7 @@ class ChatModel : public QAbstractListModel StoppedRole, ThumbsUpStateRole, ThumbsDownStateRole, - SourcesRole, - ConsolidatedSourcesRole + SourcesRole }; int rowCount(const QModelIndex &parent = QModelIndex()) const override @@ -102,8 +99,6 @@ class ChatModel : public QAbstractListModel return item.thumbsDownState; case SourcesRole: return QVariant::fromValue(item.sources); - case ConsolidatedSourcesRole: - return QVariant::fromValue(item.consolidatedSources); } return QVariant(); @@ -122,7 +117,6 @@ class ChatModel : public QAbstractListModel roles[ThumbsUpStateRole] = "thumbsUpState"; roles[ThumbsDownStateRole] = "thumbsDownState"; roles[SourcesRole] = "sources"; - roles[ConsolidatedSourcesRole] = "consolidatedSources"; return roles; } @@ -200,20 +194,6 @@ class ChatModel : public QAbstractListModel } } - QList consolidateSources(const QList &sources) { - QMap groupedData; - for (const SourceExcerpt &info : sources) { - QString key = !info.file.isEmpty() ? info.file : info.url; - if (groupedData.contains(key)) { - groupedData[key].text += "\n---\n" + info.text; - } else { - groupedData[key] = info; - } - } - QList consolidatedSources = groupedData.values(); - return consolidatedSources; - } - Q_INVOKABLE void updateSources(int index, const QList &sources) { if (index < 0 || index >= m_chatItems.size()) return; @@ -221,13 +201,10 @@ class ChatModel : public QAbstractListModel ChatItem &item = m_chatItems[index]; if (sources.isEmpty()) { item.sources.clear(); - item.consolidatedSources.clear(); } else { item.sources << sources; - item.consolidatedSources << consolidateSources(sources); } emit dataChanged(createIndex(index, 0), createIndex(index, 0), {SourcesRole}); - emit dataChanged(createIndex(index, 0), createIndex(index, 0), {ConsolidatedSourcesRole}); } Q_INVOKABLE void updateThumbsUpState(int index, bool b) @@ -278,61 +255,7 @@ class ChatModel : public QAbstractListModel stream << c.stopped; stream << c.thumbsUpState; stream << c.thumbsDownState; - if (version > 7) { - stream << c.sources.size(); - for (const SourceExcerpt &info : c.sources) { - Q_ASSERT(!info.file.isEmpty()); - stream << info.collection; - stream << info.path; - stream << info.file; - stream << info.title; - stream << info.author; - stream << info.date; - stream << info.text; - stream << info.page; - stream << info.from; - stream << info.to; - if (version > 9) { - stream << info.url; - stream << info.favicon; - } - } - } else if (version > 2) { - QList references; - QList referencesContext; - int validReferenceNumber = 1; - for (const SourceExcerpt &info : c.sources) { - if (info.file.isEmpty()) - continue; - - QString reference; - { - QTextStream stream(&reference); - stream << (validReferenceNumber++) << ". "; - if (!info.title.isEmpty()) - stream << "\"" << info.title << "\". "; - if (!info.author.isEmpty()) - stream << "By " << info.author << ". "; - if (!info.date.isEmpty()) - stream << "Date: " << info.date << ". "; - stream << "In " << info.file << ". "; - if (info.page != -1) - stream << "Page " << info.page << ". "; - if (info.from != -1) { - stream << "Lines " << info.from; - if (info.to != -1) - stream << "-" << info.to; - stream << ". "; - } - stream << "[Context](context://" << validReferenceNumber - 1 << ")"; - } - references.append(reference); - referencesContext.append(info.text); - } - - stream << references.join("\n"); - stream << referencesContext; - } + stream << SourceExcerpt::toJson(c.sources); } return stream.status() == QDataStream::Ok; } @@ -352,31 +275,36 @@ class ChatModel : public QAbstractListModel stream >> c.stopped; stream >> c.thumbsUpState; stream >> c.thumbsDownState; - if (version > 7) { + if (version > 9) { + QList sources; + QString json; + stream >> json; + QString errorString; + sources = SourceExcerpt::fromJson(json, errorString); + Q_ASSERT(errorString.isEmpty()); + c.sources = sources; + } else if (version > 7) { qsizetype count; stream >> count; QList sources; for (int i = 0; i < count; ++i) { - SourceExcerpt info; - stream >> info.collection; - stream >> info.path; - stream >> info.file; - stream >> info.title; - stream >> info.author; - stream >> info.date; - stream >> info.text; - stream >> info.page; - stream >> info.from; - stream >> info.to; - if (version > 9) { - stream >> info.url; - stream >> info.favicon; - } - sources.append(info); + SourceExcerpt source; + stream >> source.collection; + stream >> source.path; + stream >> source.file; + stream >> source.title; + stream >> source.author; + stream >> source.date; + Excerpt excerpt; + stream >> excerpt.text; + stream >> excerpt.page; + stream >> excerpt.from; + stream >> excerpt.to; + source.excerpts = QList{ excerpt }; + sources.append(source); } c.sources = sources; - c.consolidatedSources = consolidateSources(sources); - }else if (version > 2) { + } else if (version > 2) { QString references; QList referencesContext; stream >> references; @@ -398,7 +326,8 @@ class ChatModel : public QAbstractListModel for (int j = 0; j < referenceList.size(); ++j) { QString reference = referenceList[j]; QString context = referencesContext[j]; - SourceExcerpt info; + SourceExcerpt source; + Excerpt excerpt; QTextStream refStream(&reference); QString dummy; int validReferenceNumber; @@ -407,28 +336,28 @@ class ChatModel : public QAbstractListModel if (reference.contains("\"")) { int startIndex = reference.indexOf('"') + 1; int endIndex = reference.indexOf('"', startIndex); - info.title = reference.mid(startIndex, endIndex - startIndex); + source.title = reference.mid(startIndex, endIndex - startIndex); } // Extract author (after "By " and before the next period) if (reference.contains("By ")) { int startIndex = reference.indexOf("By ") + 3; int endIndex = reference.indexOf('.', startIndex); - info.author = reference.mid(startIndex, endIndex - startIndex).trimmed(); + source.author = reference.mid(startIndex, endIndex - startIndex).trimmed(); } // Extract date (after "Date: " and before the next period) if (reference.contains("Date: ")) { int startIndex = reference.indexOf("Date: ") + 6; int endIndex = reference.indexOf('.', startIndex); - info.date = reference.mid(startIndex, endIndex - startIndex).trimmed(); + source.date = reference.mid(startIndex, endIndex - startIndex).trimmed(); } // Extract file name (after "In " and before the "[Context]") if (reference.contains("In ") && reference.contains(". [Context]")) { int startIndex = reference.indexOf("In ") + 3; int endIndex = reference.indexOf(". [Context]", startIndex); - info.file = reference.mid(startIndex, endIndex - startIndex).trimmed(); + source.file = reference.mid(startIndex, endIndex - startIndex).trimmed(); } // Extract page number (after "Page " and before the next space) @@ -436,7 +365,7 @@ class ChatModel : public QAbstractListModel int startIndex = reference.indexOf("Page ") + 5; int endIndex = reference.indexOf(' ', startIndex); if (endIndex == -1) endIndex = reference.length(); - info.page = reference.mid(startIndex, endIndex - startIndex).toInt(); + excerpt.page = reference.mid(startIndex, endIndex - startIndex).toInt(); } // Extract lines (after "Lines " and before the next space or hyphen) @@ -446,18 +375,18 @@ class ChatModel : public QAbstractListModel if (endIndex == -1) endIndex = reference.length(); int hyphenIndex = reference.indexOf('-', startIndex); if (hyphenIndex != -1 && hyphenIndex < endIndex) { - info.from = reference.mid(startIndex, hyphenIndex - startIndex).toInt(); - info.to = reference.mid(hyphenIndex + 1, endIndex - hyphenIndex - 1).toInt(); + excerpt.from = reference.mid(startIndex, hyphenIndex - startIndex).toInt(); + excerpt.to = reference.mid(hyphenIndex + 1, endIndex - hyphenIndex - 1).toInt(); } else { - info.from = reference.mid(startIndex, endIndex - startIndex).toInt(); + excerpt.from = reference.mid(startIndex, endIndex - startIndex).toInt(); } } - info.text = context; - sources.append(info); + excerpt.text = context; + source.excerpts = QList{ excerpt }; + sources.append(source); } c.sources = sources; - c.consolidatedSources = consolidateSources(sources); } } beginInsertRows(QModelIndex(), m_chatItems.size(), m_chatItems.size()); diff --git a/gpt4all-chat/qml/ChatView.qml b/gpt4all-chat/qml/ChatView.qml index eb3ab5a8f0d0..b53e67c12661 100644 --- a/gpt4all-chat/qml/ChatView.qml +++ b/gpt4all-chat/qml/ChatView.qml @@ -1106,7 +1106,7 @@ Rectangle { Layout.preferredWidth: childrenRect.width Layout.preferredHeight: childrenRect.height visible: { - if (consolidatedSources.length === 0) + if (sources.length === 0) return false if (!MySettings.localDocsShowReferences) return false @@ -1134,9 +1134,9 @@ Rectangle { sourceSize.height: 24 mipmap: true source: { - if (typeof consolidatedSources === 'undefined' - || typeof consolidatedSources[0] === 'undefined' - || consolidatedSources[0].url === "") + if (typeof sources === 'undefined' + || typeof sources[0] === 'undefined' + || sources[0].url === "") return "qrc:/gpt4all/icons/db.svg"; else return "qrc:/gpt4all/icons/globe.svg"; @@ -1151,7 +1151,7 @@ Rectangle { } Text { - text: qsTr("%1 Sources").arg(consolidatedSources.length) + text: qsTr("%1 Sources").arg(sources.length) padding: 0 font.pixelSize: theme.fontSizeLarge font.bold: true @@ -1199,7 +1199,7 @@ Rectangle { Layout.column: 1 Layout.topMargin: 5 visible: { - if (consolidatedSources.length === 0) + if (sources.length === 0) return false if (!MySettings.localDocsShowReferences) return false @@ -1240,9 +1240,9 @@ Rectangle { id: flow Layout.fillWidth: true spacing: 10 - visible: consolidatedSources.length !== 0 + visible: sources.length !== 0 Repeater { - model: consolidatedSources + model: sources delegate: Rectangle { radius: 10 @@ -1361,7 +1361,7 @@ Rectangle { return false; if (MySettings.suggestionMode === 2) // Off return false; - if (MySettings.suggestionMode === 0 && consolidatedSources.length === 0) // LocalDocs only + if (MySettings.suggestionMode === 0 && sources.length === 0) // LocalDocs only return false; return currentChat.responseState === Chat.GeneratingQuestions || currentChat.generatedQuestions.length !== 0; } diff --git a/gpt4all-chat/server.cpp b/gpt4all-chat/server.cpp index e655bf9feff9..af266afbfe86 100644 --- a/gpt4all-chat/server.cpp +++ b/gpt4all-chat/server.cpp @@ -408,12 +408,8 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re message.insert("role", "assistant"); message.insert("content", result); choice.insert("message", message); - if (MySettings::globalInstance()->localDocsShowReferences()) { - QJsonArray references; - for (const auto &ref : infos) - references.append(ref.toJson()); - choice.insert("references", references); - } + if (MySettings::globalInstance()->localDocsShowReferences()) + choice.insert("references", SourceExcerpt::toJson(infos)); choices.append(choice); } } else { @@ -426,12 +422,8 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re choice.insert("index", index++); choice.insert("logprobs", QJsonValue::Null); // We don't support choice.insert("finish_reason", responseTokens == max_tokens ? "length" : "stop"); - if (MySettings::globalInstance()->localDocsShowReferences()) { - QJsonArray references; - for (const auto &ref : infos) - references.append(ref.toJson()); - choice.insert("references", references); - } + if (MySettings::globalInstance()->localDocsShowReferences()) + choice.insert("references", SourceExcerpt::toJson(infos)); choices.append(choice); } } diff --git a/gpt4all-chat/sourceexcerpt.cpp b/gpt4all-chat/sourceexcerpt.cpp index 811d65f050d0..0c702ce9ff1b 100644 --- a/gpt4all-chat/sourceexcerpt.cpp +++ b/gpt4all-chat/sourceexcerpt.cpp @@ -5,8 +5,53 @@ #include #include +QString SourceExcerpt::toJson(const QList &sources) +{ + if (sources.isEmpty()) + return QString(); + + QJsonArray resultsArray; + for (const auto &source : sources) { + QJsonObject sourceObj; + sourceObj["date"] = source.date; + sourceObj["collection"] = source.collection; + sourceObj["path"] = source.path; + sourceObj["file"] = source.file; + sourceObj["url"] = source.url; + sourceObj["favicon"] = source.favicon; + sourceObj["title"] = source.title; + sourceObj["author"] = source.author; + sourceObj["description"] = source.description; + + QJsonArray excerptsArray; + for (const auto &excerpt : source.excerpts) { + QJsonObject excerptObj; + excerptObj["text"] = excerpt.text; + if (excerpt.page != -1) + excerptObj["page"] = excerpt.page; + if (excerpt.from != -1) + excerptObj["from"] = excerpt.from; + if (excerpt.to != -1) + excerptObj["to"] = excerpt.to; + excerptsArray.append(excerptObj); + } + sourceObj["excerpts"] = excerptsArray; + + resultsArray.append(sourceObj); + } + + QJsonObject jsonObj; + jsonObj["results"] = resultsArray; + + QJsonDocument doc(jsonObj); + return doc.toJson(QJsonDocument::Compact); +} + QList SourceExcerpt::fromJson(const QString &json, QString &errorString) { + if (json.isEmpty()) + return QList(); + QJsonParseError err; QJsonDocument document = QJsonDocument::fromJson(json.toUtf8(), &err); if (err.error != QJsonParseError::NoError) { @@ -44,7 +89,7 @@ QList SourceExcerpt::fromJson(const QString &json, QString &error SourceExcerpt source; source.date = result["date"].toString(); if (result.contains("collection")) - source.collection = result["text"].toString(); + source.collection = result["collection"].toString(); if (result.contains("path")) source.path = result["path"].toString(); if (result.contains("file")) @@ -61,15 +106,6 @@ QList SourceExcerpt::fromJson(const QString &json, QString &error source.author = result["description"].toString(); for (int i = 0; i < textExcerpts.size(); ++i) { - SourceExcerpt excerpt; - excerpt.date = source.date; - excerpt.collection = source.collection; - excerpt.path = source.path; - excerpt.file = source.file; - excerpt.url = source.url; - excerpt.favicon = source.favicon; - excerpt.title = source.title; - excerpt.author = source.author; if (!textExcerpts[i].isObject()) { errorString = "result excerpt is not an object"; return QList(); @@ -79,6 +115,7 @@ QList SourceExcerpt::fromJson(const QString &json, QString &error errorString = "result excerpt is does not have text field"; return QList(); } + Excerpt excerpt; excerpt.text = excerptObj["text"].toString(); if (excerptObj.contains("page")) excerpt.page = excerptObj["page"].toInt(); @@ -86,8 +123,9 @@ QList SourceExcerpt::fromJson(const QString &json, QString &error excerpt.from = excerptObj["from"].toInt(); if (excerptObj.contains("to")) excerpt.to = excerptObj["to"].toInt(); - excerpts.append(excerpt); + source.excerpts.append(excerpt); } + excerpts.append(source); } return excerpts; } diff --git a/gpt4all-chat/sourceexcerpt.h b/gpt4all-chat/sourceexcerpt.h index 8276923940d1..3f02457ca4dc 100644 --- a/gpt4all-chat/sourceexcerpt.h +++ b/gpt4all-chat/sourceexcerpt.h @@ -8,10 +8,23 @@ using namespace Qt::Literals::StringLiterals; +struct Excerpt { + QString text; // [Required] The text actually used in the augmented context + int page = -1; // [Optional] The page where the text was found + int from = -1; // [Optional] The line number where the text begins + int to = -1; // [Optional] The line number where the text ends + bool operator==(const Excerpt &other) const { + return text == other.text && page == other.page && from == other.from && to == other.to; + } + bool operator!=(const Excerpt &other) const { + return !(*this == other); + } +}; +Q_DECLARE_METATYPE(Excerpt) + struct SourceExcerpt { Q_GADGET Q_PROPERTY(QString date MEMBER date) - Q_PROPERTY(QString text MEMBER text) Q_PROPERTY(QString collection MEMBER collection) Q_PROPERTY(QString path MEMBER path) Q_PROPERTY(QString file MEMBER file) @@ -20,25 +33,40 @@ struct SourceExcerpt { Q_PROPERTY(QString title MEMBER title) Q_PROPERTY(QString author MEMBER author) Q_PROPERTY(QString description MEMBER description) - Q_PROPERTY(int page MEMBER page) - Q_PROPERTY(int from MEMBER from) - Q_PROPERTY(int to MEMBER to) Q_PROPERTY(QString fileUri READ fileUri STORED false) + Q_PROPERTY(QString text READ text STORED false) + Q_PROPERTY(QList excerpts MEMBER excerpts) public: - QString date; // [Required] The creation or the last modification date whichever is latest - QString text; // [Required] The text actually used in the augmented context - QString collection; // [Optional] The name of the collection - QString path; // [Optional] The full path - QString file; // [Optional] The name of the file, but not the full path - QString url; // [Optional] The name of the remote url - QString favicon; // [Optional] The favicon - QString title; // [Optional] The title of the document - QString author; // [Optional] The author of the document - QString description;// [Optional] The description of the source - int page = -1; // [Optional] The page where the text was found - int from = -1; // [Optional] The line number where the text begins - int to = -1; // [Optional] The line number where the text ends + QString date; // [Required] The creation or the last modification date whichever is latest + QString collection; // [Optional] The name of the collection + QString path; // [Optional] The full path + QString file; // [Optional] The name of the file, but not the full path + QString url; // [Optional] The name of the remote url + QString favicon; // [Optional] The favicon + QString title; // [Optional] The title of the document + QString author; // [Optional] The author of the document + QString description; // [Optional] The description of the source + QList excerpts;// [Required] The list of excerpts + + // Returns a human readable string containing all the excerpts + QString text() const { + QStringList formattedExcerpts; + for (const auto& excerpt : excerpts) { + QString formattedExcerpt = excerpt.text; + if (excerpt.page != -1) { + formattedExcerpt += QStringLiteral(" (Page: %1").arg(excerpt.page); + if (excerpt.from != -1 && excerpt.to != -1) { + formattedExcerpt += QStringLiteral(", Lines: %1-%2").arg(excerpt.from).arg(excerpt.to); + } + formattedExcerpt += QStringLiteral(")"); + } else if (excerpt.from != -1 && excerpt.to != -1) { + formattedExcerpt += QStringLiteral(" (Lines: %1-%2)").arg(excerpt.from).arg(excerpt.to); + } + formattedExcerpts.append(formattedExcerpt); + } + return formattedExcerpts.join(QStringLiteral("\n---\n")); + } QString fileUri() const { // QUrl reserved chars that are not UNSAFE_PATH according to glib/gconvert.c @@ -55,25 +83,7 @@ struct SourceExcerpt { return u"file://"_s + escaped; } - QJsonObject toJson() const - { - QJsonObject result; - result.insert("date", date); - result.insert("text", text); - result.insert("collection", collection); - result.insert("path", path); - result.insert("file", file); - result.insert("url", url); - result.insert("favicon", favicon); - result.insert("title", title); - result.insert("author", author); - result.insert("description", description); - result.insert("page", page); - result.insert("from", from); - result.insert("to", to); - return result; - } - + static QString toJson(const QList &sources); static QList fromJson(const QString &json, QString &errorString); bool operator==(const SourceExcerpt &other) const {