@@ -51,6 +51,26 @@ describe('newline_reader', () => {
51
51
expect ( result ) . toStrictEqual ( UTF8DATA_ARR ) ;
52
52
} ) ;
53
53
54
+ it ( 'next_line_file_offset - can process utf8 characters when termination with newline character' , async ( ) => {
55
+ const UTF8DATA_BUF = Buffer . from ( UTF8DATA_ARR . join ( '\n' ) + '\n' , 'utf8' ) ;
56
+
57
+ const reader = new NewlineReader ( { } , '' , { skip_leftover_line : true , read_file_offset : 0 } ) ;
58
+ // @ts -ignore
59
+ reader . fh = mocked_file_handler ( UTF8DATA_BUF ) ;
60
+
61
+ const result = [ ] ;
62
+ let expected_cur_next_line_file_offset = 0 ;
63
+ const [ processed ] = await reader . forEach ( async entry => {
64
+ result . push ( entry ) ;
65
+ expected_cur_next_line_file_offset += Buffer . byteLength ( entry , 'utf8' ) + 1 ;
66
+ expect ( reader . next_line_file_offset ) . toBe ( expected_cur_next_line_file_offset ) ;
67
+ return true ;
68
+ } ) ;
69
+
70
+ expect ( processed ) . toBe ( UTF8DATA_ARR . length ) ;
71
+ expect ( result ) . toStrictEqual ( UTF8DATA_ARR ) ;
72
+ } ) ;
73
+
54
74
it ( 'can process utf8 characters when termination not with new line character' , async ( ) => {
55
75
const UTF8DATA_BUF = Buffer . from ( UTF8DATA_ARR . join ( '\n' ) , 'utf8' ) ;
56
76
@@ -68,6 +88,47 @@ describe('newline_reader', () => {
68
88
expect ( result ) . toStrictEqual ( UTF8DATA_ARR ) ;
69
89
} ) ;
70
90
91
+ it ( 'next_line_file_offset - can process utf8 characters when termination not with new line character' , async ( ) => {
92
+ const UTF8DATA_BUF = Buffer . from ( UTF8DATA_ARR . join ( '\n' ) , 'utf8' ) ;
93
+
94
+ const reader = new NewlineReader ( { } , '' , { read_file_offset : 0 } ) ;
95
+ // @ts -ignore
96
+ reader . fh = mocked_file_handler ( UTF8DATA_BUF ) ;
97
+
98
+ const result = [ ] ;
99
+ let expected_cur_next_line_file_offset = 0 ;
100
+ const [ processed ] = await reader . forEach ( async entry => {
101
+ result . push ( entry ) ;
102
+ expected_cur_next_line_file_offset += Buffer . byteLength ( entry , 'utf8' ) + ( reader . eof ? 0 : 1 ) ;
103
+ expect ( reader . next_line_file_offset ) . toBe ( expected_cur_next_line_file_offset ) ;
104
+ return true ;
105
+ } ) ;
106
+
107
+ expect ( processed ) . toBe ( UTF8DATA_ARR . length ) ;
108
+ expect ( result ) . toStrictEqual ( UTF8DATA_ARR ) ;
109
+ } ) ;
110
+
111
+ it ( 'next_line_file_offset starts from the second line - can process utf8 characters when termination not with new line character' , async ( ) => {
112
+ const UTF8DATA_BUF = Buffer . from ( UTF8DATA_ARR . join ( '\n' ) , 'utf8' ) ;
113
+ const expected_to_be_processed_data_array = UTF8DATA_ARR . slice ( 1 ) ;
114
+ const initial_next_line_file_offset = Buffer . byteLength ( UTF8DATA_ARR [ 0 ] , 'utf8' ) + 1 ;
115
+ const reader = new NewlineReader ( { } , '' , { read_file_offset : initial_next_line_file_offset } ) ;
116
+ // @ts -ignore
117
+ reader . fh = mocked_file_handler ( UTF8DATA_BUF ) ;
118
+
119
+ const result = [ ] ;
120
+ let expected_cur_next_line_file_offset = initial_next_line_file_offset ;
121
+ const [ processed ] = await reader . forEach ( async entry => {
122
+ result . push ( entry ) ;
123
+ expected_cur_next_line_file_offset += Buffer . byteLength ( entry , 'utf8' ) + ( reader . eof ? 0 : 1 ) ;
124
+ expect ( reader . next_line_file_offset ) . toBe ( expected_cur_next_line_file_offset ) ;
125
+ return true ;
126
+ } ) ;
127
+
128
+ expect ( processed ) . toBe ( expected_to_be_processed_data_array . length ) ;
129
+ expect ( result ) . toStrictEqual ( expected_to_be_processed_data_array ) ;
130
+ } ) ;
131
+
71
132
it ( 'can process utf8 characters when termination not with new line character [bufsize = 4]' , async ( ) => {
72
133
const expected = "abc" ;
73
134
const UTF8DATA_ARR_TEMP = [ ...UTF8DATA_ARR , expected ] ;
@@ -86,5 +147,26 @@ describe('newline_reader', () => {
86
147
expect ( processed ) . toBe ( 1 ) ;
87
148
expect ( result ) . toStrictEqual ( [ expected ] ) ;
88
149
} ) ;
150
+
151
+ it ( 'next_line_file_offset - can process utf8 characters when termination not with new line character [bufsize = 4]' , async ( ) => {
152
+ const expected = "abc" ;
153
+ const UTF8DATA_ARR_TEMP = [ ...UTF8DATA_ARR , expected ] ;
154
+ const UTF8DATA_BUF = Buffer . from ( UTF8DATA_ARR_TEMP . join ( '\n' ) , 'utf8' ) ;
155
+
156
+ const reader = new NewlineReader ( { } , '' , { bufsize : 256 , skip_overflow_lines : true , read_file_offset : 0 } ) ;
157
+ // @ts -ignore
158
+ reader . fh = mocked_file_handler ( UTF8DATA_BUF ) ;
159
+
160
+ const result = [ ] ;
161
+ const [ processed ] = await reader . forEach ( async entry => {
162
+ result . push ( entry ) ;
163
+ return true ;
164
+ } ) ;
165
+
166
+ expect ( processed ) . toBe ( 1 ) ;
167
+ expect ( result ) . toStrictEqual ( [ expected ] ) ;
168
+ const expected_cur_next_line_file_offset = UTF8DATA_BUF . length ;
169
+ expect ( reader . next_line_file_offset ) . toBe ( expected_cur_next_line_file_offset ) ;
170
+ } ) ;
89
171
} ) ;
90
172
} ) ;
0 commit comments