-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_splitters_test.go
100 lines (82 loc) · 2.25 KB
/
text_splitters_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package flowllm_test
import (
. "github.com/deluan/flowllm"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("Splitters", func() {
Describe("RecursiveTextSplitter", func() {
var (
text string
splitter Splitter
expectedOutput []string
)
BeforeEach(func() {
text = "This is a sample text for testing the RecursiveTextSplitter function."
})
Context("with default options", func() {
BeforeEach(func() {
splitter = RecursiveTextSplitter(SplitterOptions{})
expectedOutput = []string{
"This is a sample text for testing the RecursiveTextSplitter function.",
}
})
It("splits the text into chunks based on the default chunk size and overlap", func() {
chunks, err := splitter(text)
Expect(err).NotTo(HaveOccurred())
Expect(chunks).To(Equal(expectedOutput))
})
})
Context("with custom options", func() {
BeforeEach(func() {
splitter = RecursiveTextSplitter(SplitterOptions{
ChunkSize: 20,
ChunkOverlap: 2,
Separators: []string{"\n"},
})
expectedOutput = []string{
"This is a sample te",
"text for testing th",
"the RecursiveTextSp",
"Splitter function.",
}
})
It("splits the text into chunks based on the custom chunk size and overlap", func() {
chunks, err := splitter(text)
Expect(err).NotTo(HaveOccurred())
Expect(chunks).To(Equal(expectedOutput))
})
})
})
Describe("MarkdownSplitter", func() {
var (
text string
splitter Splitter
expectedOutput []string
)
BeforeEach(func() {
text = `
# Header 1
This is some content.
## Header 2
This is some more content.
### Header 3
This is even more content.`
})
Context("with markdown formatted text", func() {
BeforeEach(func() {
splitter = MarkdownSplitter(SplitterOptions{ChunkSize: 40, ChunkOverlap: 20})
expectedOutput = []string{
"# Header 1\n\nThis is some content.",
"Header 2\n\nThis is some more content.",
"Header 3\n\nThis is even more content.",
}
})
It("splits the text into chunks based on the default chunk size and overlap", func() {
chunks, err := splitter(text)
Expect(err).NotTo(HaveOccurred())
Expect(chunks).To(Equal(expectedOutput))
})
})
})
})