Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/lobehub/lobe-chat
Browse files Browse the repository at this point in the history
  • Loading branch information
actions-user committed Mar 6, 2025
2 parents cb6e51c + 00a33bf commit 84c6cc1
Show file tree
Hide file tree
Showing 11 changed files with 407 additions and 5 deletions.
25 changes: 25 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,31 @@

# Changelog

### [Version 1.68.9](https://github.com/lobehub/lobe-chat/compare/v1.68.8...v1.68.9)

<sup>Released on **2025-03-05**</sup>

#### 💄 Styles

- **misc**: Add epub file chunk split support.

<br/>

<details>
<summary><kbd>Improvements and Fixes</kbd></summary>

#### Styles

- **misc**: Add epub file chunk split support, closes [#6317](https://github.com/lobehub/lobe-chat/issues/6317) ([a79ab7a](https://github.com/lobehub/lobe-chat/commit/a79ab7a))

</details>

<div align="right">

[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)

</div>

### [Version 1.68.8](https://github.com/lobehub/lobe-chat/compare/v1.68.7...v1.68.8)

<sup>Released on **2025-03-05**</sup>
Expand Down
7 changes: 7 additions & 0 deletions changelog/v1.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
[
{
"children": {
"improvements": ["Add epub file chunk split support."]
},
"date": "2025-03-05",
"version": "1.68.9"
},
{
"children": {
"improvements": ["Improve openrouter models info."]
Expand Down
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@lobehub/chat",
"version": "1.68.8",
"version": "1.68.9",
"description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
"keywords": [
"framework",
Expand Down Expand Up @@ -162,10 +162,12 @@
"diff": "^7.0.0",
"drizzle-orm": "^0.40.0",
"drizzle-zod": "^0.5.1",
"epub2": "^3.0.2",
"fast-deep-equal": "^3.1.3",
"file-type": "^20.0.0",
"framer-motion": "^11.16.0",
"gpt-tokenizer": "^2.8.1",
"html-to-text": "^9.0.5",
"i18next": "^24.2.1",
"i18next-browser-languagedetector": "^8.0.2",
"i18next-resources-to-backend": "^1.2.1",
Expand Down
11 changes: 8 additions & 3 deletions src/database/client/migrations.json
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,10 @@
"hash": "9646161fa041354714f823d726af27247bcd6e60fa3be5698c0d69f337a5700b"
},
{
"sql": ["DROP TABLE \"user_budgets\";", "\nDROP TABLE \"user_subscriptions\";"],
"sql": [
"DROP TABLE \"user_budgets\";",
"\nDROP TABLE \"user_subscriptions\";"
],
"bps": true,
"folderMillis": 1729699958471,
"hash": "7dad43a2a25d1aec82124a4e53f8d82f8505c3073f23606c1dc5d2a4598eacf9"
Expand Down Expand Up @@ -295,7 +298,9 @@
"hash": "845a692ceabbfc3caf252a97d3e19a213bc0c433df2689900135f9cfded2cf49"
},
{
"sql": ["ALTER TABLE \"messages\" ADD COLUMN \"reasoning\" jsonb;"],
"sql": [
"ALTER TABLE \"messages\" ADD COLUMN \"reasoning\" jsonb;"
],
"bps": true,
"folderMillis": 1737609172353,
"hash": "2cb36ae4fcdd7b7064767e04bfbb36ae34518ff4bb1b39006f2dd394d1893868"
Expand All @@ -309,4 +314,4 @@
"folderMillis": 1739901891891,
"hash": "78d8fefd8c58938d7bc3da2295a73b35ce2e8d7cb2820f8e817acdb8dd5bebb2"
}
]
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html

exports[`EPubLoader > should run 1`] = `
[
Document {
"id": undefined,
"metadata": {
"loc": {
"lines": {
"from": 1,
"to": 13,
},
},
"source": "",
},
"pageContent": "HEFTY WATER
This document serves to test Reading System support for the epub:switch
[http://idpf.org/epub/30/spec/epub30-contentdocs.html#sec-xhtml-content-switch]
element. There is also a little bit of ruby markup
[http://www.w3.org/TR/html5/the-ruby-element.html#the-ruby-element] available.
THE SWITCH
Below is an instance of the epub:switch element, containing Chemical Markup
Language [http://en.wikipedia.org/wiki/Chemical_Markup_Language] (CML). The
fallback content is a chunk of plain XHTML5.",
},
Document {
"id": undefined,
"metadata": {
"loc": {
"lines": {
"from": 9,
"to": 22,
},
},
"source": "",
},
"pageContent": "THE SWITCH
Below is an instance of the epub:switch element, containing Chemical Markup
Language [http://en.wikipedia.org/wiki/Chemical_Markup_Language] (CML). The
fallback content is a chunk of plain XHTML5.
* If your Reading System supports epub:switch and CML, it will render the CML
formula natively, and ignore (a.k.a not display) the XHTML fallback.
* If your Reading System supports epub:switch but not CML, it will ignore (not
display) the CML formula, and render the the XHTML fallback instead.
* If your Reading System does not support epub:switch at all, then the
rendering results are somewhat unpredictable, but the most likely result is
that it will display both a failed attempt to render the CML and the XHTML
fallback.",
},
Document {
"id": undefined,
"metadata": {
"loc": {
"lines": {
"from": 24,
"to": 43,
},
},
"source": "",
},
"pageContent": "Note: the XHTML fallback is bold and enclosed in a gray dotted box with a
slightly gray background. A failed CML rendering will most likely appear above
the gray fallback box and read:
"H hydrogen O oxygen hefty H O water".
Here the switch begins...
H hydrogen O oxygen hefty H O water
2H2 + O2 ⟶ 2H2O
... and here the switch ends.
THE SOURCE
Below is a rendition of the source code of the switch element. Your Reading
System should display this correctly regardless of whether it supports the
switch element.",
},
Document {
"id": undefined,
"metadata": {
"loc": {
"lines": {
"from": 46,
"to": 66,
},
},
"source": "",
},
"pageContent": "<switch xmlns="http://www.idpf.org/2007/ops">
<case required-namespace="http://www.xml-cml.org/schema">
<chem xmlns="http://www.xml-cml.org/schema">
<reaction>
<molecule n="2">
<atom n="2"> H </atom>
<caption> hydrogen </caption>
</molecule>
<plus></plus>
<molecule>
<atom n="2"> O </atom>
<caption> oxygen </caption>
</molecule>
<gives>
<caption> hefty </caption>
</gives>
<molecule n="2">
<atom n="2"> H </atom>
<atom> O </atom>
<caption> water </caption>
</molecule>",
},
Document {
"id": undefined,
"metadata": {
"loc": {
"lines": {
"from": 57,
"to": 79,
},
},
"source": "",
},
"pageContent": "<caption> oxygen </caption>
</molecule>
<gives>
<caption> hefty </caption>
</gives>
<molecule n="2">
<atom n="2"> H </atom>
<atom> O </atom>
<caption> water </caption>
</molecule>
</reaction>
</chem>
</case>
<default>
<p xmlns="http://www.w3.org/1999/xhtml" id="fallback">
<span>2H<sub>2</sub></span>
<span>+</span>
<span>O<sub>2</sub></span>
<span>⟶</span>
<span>2H<sub>2</sub>O</span>
</p>
</default>
</switch>",
},
Document {
"id": undefined,
"metadata": {
"loc": {
"lines": {
"from": 84,
"to": 94,
},
},
"source": "",
},
"pageContent": "HEFTY RUBY WATER
While the ruby element is mostly used in east-asian languages, it can also be
useful in other contexts. As an example, and as you can see in the source of the
CML element above, the code includes a caption element which is intended to be
displayed below the formula segments. Following this paragraph is a reworked
version of the XHTML fallback used above, using the ruby element. If your
Reading System does not support ruby markup, then the captions will appear in
parentheses on the same line as the formula segments.
2H2(hydrogen) + O2(oxygen) ⟶(hefty) 2H2O(water)",
},
Document {
"id": undefined,
"metadata": {
"loc": {
"lines": {
"from": 94,
"to": 111,
},
},
"source": "",
},
"pageContent": "2H2(hydrogen) + O2(oxygen) ⟶(hefty) 2H2O(water)
If your Reading System in addition to supporting ruby markup also supports the
-epub-ruby-position
[http://idpf.org/epub/30/spec/epub30-contentdocs.html#sec-css-ruby-position]
property, then the captions will appear under the formula segments instead of
over them.
The source code for the ruby version of the XHTML fallback looks as follows:
<p id="rubyp">
<ruby>2H<sub>2</sub><rp>(</rp><rt>hydrogen</rt><rp>)</rp></ruby>
<span>+</span>
<ruby>O<sub>2</sub><rp>(</rp><rt>oxygen</rt><rp>)</rp></ruby>
<ruby>⟶<rp>(</rp><rt>hefty</rt><rp>)</rp></ruby>
<ruby>2H<sub>2</sub>O<rp>(</rp><rt>water</rt><rp>)</rp></ruby>
</p>",
},
Document {
"id": undefined,
"metadata": {
"loc": {
"lines": {
"from": 105,
"to": 120,
},
},
"source": "",
},
"pageContent": "<p id="rubyp">
<ruby>2H<sub>2</sub><rp>(</rp><rt>hydrogen</rt><rp>)</rp></ruby>
<span>+</span>
<ruby>O<sub>2</sub><rp>(</rp><rt>oxygen</rt><rp>)</rp></ruby>
<ruby>⟶<rp>(</rp><rt>hefty</rt><rp>)</rp></ruby>
<ruby>2H<sub>2</sub>O<rp>(</rp><rt>water</rt><rp>)</rp></ruby>
</p>
... and the css declaration using the -epub-ruby-position property looks like
this:
p#rubyp {
-epub-ruby-position : under;
}",
},
]
`;
Binary file not shown.
24 changes: 24 additions & 0 deletions src/libs/langchain/loaders/epub/__tests__/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// @vitest-environment node
import * as fs from 'node:fs';
import { join } from 'node:path';
import { expect } from 'vitest';

import { EPubLoader } from '../index';

function sanitizeDynamicFields(document: any[]) {
for (const doc of document) {
doc.metadata.source && (doc.metadata.source = '');
}
return document;
}

describe('EPubLoader', () => {
it('should run', async () => {
const content = fs.readFileSync(join(__dirname, `./demo.epub`));

const fileContent: Uint8Array = new Uint8Array(content);

const data = await EPubLoader(fileContent);
expect(sanitizeDynamicFields(data)).toMatchSnapshot();
});
});
21 changes: 21 additions & 0 deletions src/libs/langchain/loaders/epub/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { EPubLoader as Loader } from '@langchain/community/document_loaders/fs/epub';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { loaderConfig } from '../config';
import { TempFileManager } from '@/server/utils/tempFileManager';

export const EPubLoader = async (content: Uint8Array) => {
const tempManager = new TempFileManager();
try {
const tempPath = await tempManager.writeTempFile(content);
const loader = new Loader(tempPath);
const documents = await loader.load();

const splitter = new RecursiveCharacterTextSplitter(loaderConfig);
return await splitter.splitDocuments(documents);
} catch (e) {
throw new Error(`EPubLoader error: ${(e as Error).message}`);
} finally {
tempManager.cleanup(); // 确保清理
}

};
Loading

0 comments on commit 84c6cc1

Please sign in to comment.