@@ -4,15 +4,16 @@ use duckdb::vtab::{
4
4
BindInfo , DataChunk , Free , FunctionInfo , InitInfo , LogicalType , LogicalTypeId , VTab ,
5
5
VTabLocalData ,
6
6
} ;
7
- use prost_reflect:: { DescriptorPool , DynamicMessage , MessageDescriptor } ;
7
+ use prost_reflect:: { DescriptorPool , DynamicMessage , MessageDescriptor , ReflectMessage } ;
8
8
use std:: error:: Error ;
9
+ use std:: ffi:: CString ;
9
10
use std:: fs:: File ;
10
11
use std:: io:: Read ;
11
12
use std:: ops:: { Deref , DerefMut } ;
12
- use std:: path:: PathBuf ;
13
+ use std:: path:: { Path , PathBuf } ;
13
14
14
- use crate :: io:: { parse, DelimitedLengthKind , LengthDelimitedRecordsReader , LengthKind } ;
15
- use crate :: read:: write_to_output;
15
+ use crate :: io:: { parse, DelimitedLengthKind , LengthDelimitedRecordsReader , LengthKind , Record } ;
16
+ use crate :: read:: { write_to_output, MyFlatVector , VectorAccessor } ;
16
17
use crate :: types:: into_logical_type;
17
18
18
19
pub struct Parameters {
@@ -21,6 +22,9 @@ pub struct Parameters {
21
22
pub message_name : String ,
22
23
pub shared_message_descriptor : MessageDescriptor ,
23
24
pub length_kind : LengthKind ,
25
+ pub include_filename : bool ,
26
+ pub include_position : bool ,
27
+ pub include_size : bool ,
24
28
}
25
29
26
30
impl Parameters {
@@ -62,18 +66,35 @@ impl Parameters {
62
66
let length_kind = parse :: < LengthKind > ( & length_kind. to_string ( ) )
63
67
. map_err ( |err| format_err ! ( "when parsing parameter delimiter: {}" , err) ) ?;
64
68
69
+ let include_filename = bind
70
+ . get_named_parameter ( "filename" )
71
+ . map ( |value| value. to_int64 ( ) != 0 )
72
+ . unwrap_or ( false ) ;
73
+
74
+ let include_position = bind
75
+ . get_named_parameter ( "position" )
76
+ . map ( |value| value. to_int64 ( ) != 0 )
77
+ . unwrap_or ( false ) ;
78
+
79
+ let include_size = bind
80
+ . get_named_parameter ( "size" )
81
+ . map ( |value| value. to_int64 ( ) != 0 )
82
+ . unwrap_or ( false ) ;
83
+
65
84
Ok ( Self {
66
85
files,
67
86
descriptor_bytes,
68
87
message_name,
69
88
shared_message_descriptor : message_descriptor,
70
89
length_kind,
90
+ include_filename,
91
+ include_position,
92
+ include_size,
71
93
} )
72
94
}
73
95
74
96
pub fn message_descriptor ( & self ) -> Result < MessageDescriptor , anyhow:: Error > {
75
- let descriptor_pool =
76
- DescriptorPool :: decode ( self . descriptor_bytes . as_slice ( ) ) ?;
97
+ let descriptor_pool = DescriptorPool :: decode ( self . descriptor_bytes . as_slice ( ) ) ?;
77
98
78
99
let message_descriptor = descriptor_pool
79
100
. get_message_by_name ( & self . message_name )
@@ -100,10 +121,18 @@ impl Parameters {
100
121
"delimiter" . to_string( ) ,
101
122
LogicalType :: new( LogicalTypeId :: Varchar ) ,
102
123
) ,
124
+ (
125
+ "filename" . to_string( ) ,
126
+ LogicalType :: new( LogicalTypeId :: Boolean ) ,
127
+ ) ,
128
+ (
129
+ "position" . to_string( ) ,
130
+ LogicalType :: new( LogicalTypeId :: Boolean ) ,
131
+ ) ,
132
+ ( "size" . to_string( ) , LogicalType :: new( LogicalTypeId :: Boolean ) ) ,
103
133
]
104
134
}
105
135
}
106
-
107
136
pub struct GlobalState {
108
137
queue : ArrayQueue < PathBuf > ,
109
138
}
@@ -180,6 +209,18 @@ impl ProtobufVTab {
180
209
) ;
181
210
}
182
211
212
+ if params. include_filename {
213
+ bind. add_result_column ( "filename" , LogicalType :: new ( LogicalTypeId :: Varchar ) ) ;
214
+ }
215
+
216
+ if params. include_position {
217
+ bind. add_result_column ( "position" , LogicalType :: new ( LogicalTypeId :: UBigint ) ) ;
218
+ }
219
+
220
+ if params. include_size {
221
+ bind. add_result_column ( "size" , LogicalType :: new ( LogicalTypeId :: UBigint ) ) ;
222
+ }
223
+
183
224
data. assign ( params) ;
184
225
185
226
Ok ( ( ) )
@@ -221,13 +262,17 @@ impl ProtobufVTab {
221
262
let mut column_information = Default :: default ( ) ;
222
263
223
264
for output_row_idx in 0 ..available_chunk_size {
224
- let bytes = match state_container. next_message ( ) ? {
265
+ let StateContainerValue {
266
+ path_reference,
267
+ size,
268
+ bytes,
269
+ position,
270
+ } = match state_container. next_message ( ) ? {
225
271
None => break ,
226
- Some ( bytes ) => bytes ,
272
+ Some ( message_info ) => message_info ,
227
273
} ;
228
274
229
- let message =
230
- DynamicMessage :: decode ( local_descriptor. clone ( ) , bytes. as_slice ( ) ) ?;
275
+ let message = DynamicMessage :: decode ( local_descriptor. clone ( ) , bytes. as_slice ( ) ) ?;
231
276
232
277
write_to_output (
233
278
& mut column_information,
@@ -237,6 +282,48 @@ impl ProtobufVTab {
237
282
output_row_idx,
238
283
) ?;
239
284
285
+ let mut field_offset = message. descriptor ( ) . fields ( ) . len ( ) ;
286
+
287
+ if parameters. include_filename {
288
+ let it = ( || -> Option < CString > {
289
+ let value = CString :: new ( path_reference. path ( ) . to_str ( ) ?) . ok ( ) ?;
290
+ Some ( value)
291
+ } ) ( ) ;
292
+
293
+ let column = output. get_vector ( field_offset) ;
294
+
295
+ match it {
296
+ None => unsafe {
297
+ let validity = duckdb:: ffi:: duckdb_vector_get_validity ( column) ;
298
+ duckdb:: ffi:: duckdb_validity_set_row_invalid ( validity, output_row_idx as _ ) ;
299
+ } ,
300
+ Some ( value) => unsafe {
301
+ duckdb:: ffi:: duckdb_vector_assign_string_element (
302
+ column,
303
+ output_row_idx as _ ,
304
+ value. as_ptr ( ) ,
305
+ )
306
+ } ,
307
+ }
308
+
309
+ field_offset += 1 ;
310
+ }
311
+
312
+ if parameters. include_position {
313
+ let column = output. get_vector ( field_offset) ;
314
+ let mut vector =
315
+ unsafe { MyFlatVector :: < u64 > :: with_capacity ( column, available_chunk_size) } ;
316
+ vector. as_mut_slice ( ) [ output_row_idx] = position as _ ;
317
+ field_offset += 1 ;
318
+ }
319
+
320
+ if parameters. include_size {
321
+ let column = output. get_vector ( field_offset) ;
322
+ let mut vector =
323
+ unsafe { MyFlatVector :: < u64 > :: with_capacity ( column, available_chunk_size) } ;
324
+ vector. as_mut_slice ( ) [ output_row_idx] = size as _ ;
325
+ }
326
+
240
327
items += 1 ;
241
328
}
242
329
@@ -252,47 +339,81 @@ struct StateContainer<'a> {
252
339
parameters : & ' a Parameters ,
253
340
}
254
341
255
- impl StateContainer < ' _ > {
256
- fn next_message ( & mut self ) -> Result < Option < Vec < u8 > > , anyhow:: Error > {
257
- let file_reader = if let Some ( reader) = & mut self . local_state . current {
258
- reader
259
- } else {
260
- let Some ( next_file_path) = self . global_state . queue . pop ( ) else {
261
- return Ok ( None ) ;
262
- } ;
342
+ enum PathReference < ' a > {
343
+ Borrowed ( & ' a Path ) ,
344
+ Owned ( PathBuf ) ,
345
+ }
346
+
347
+ impl < ' a > PathReference < ' a > {
348
+ pub fn path ( & self ) -> & Path {
349
+ match self {
350
+ PathReference :: Borrowed ( it) => * it,
351
+ PathReference :: Owned ( it) => it. as_path ( ) ,
352
+ }
353
+ }
354
+ }
355
+
356
+ struct StateContainerValue < ' a > {
357
+ path_reference : PathReference < ' a > ,
358
+ bytes : Vec < u8 > ,
359
+ size : usize ,
360
+ position : u64 ,
361
+ }
263
362
264
- let mut next_file = File :: open ( & next_file_path) ?;
265
- match self . parameters . length_kind {
266
- LengthKind :: BigEndianFixed => {
267
- self . local_state . current = Some ( LengthDelimitedRecordsReader :: create (
363
+ impl StateContainer < ' _ > {
364
+ fn next_message ( & mut self ) -> Result < Option < StateContainerValue > , anyhow:: Error > {
365
+ let mut value = match self . local_state . current . take ( ) {
366
+ Some ( it) => it,
367
+ None => {
368
+ let Some ( next_file_path) = self . global_state . queue . pop ( ) else {
369
+ return Ok ( None ) ;
370
+ } ;
371
+
372
+ let mut next_file = File :: open ( & next_file_path) ?;
373
+ match self . parameters . length_kind {
374
+ LengthKind :: BigEndianFixed => LengthDelimitedRecordsReader :: create (
268
375
next_file,
269
376
DelimitedLengthKind :: BigEndianFixed ,
270
- ) ) ;
271
-
272
- self . local_state . current . as_mut ( ) . unwrap ( )
273
- }
274
- LengthKind :: Varint => {
275
- self . local_state . current = Some ( LengthDelimitedRecordsReader :: create (
377
+ next_file_path,
378
+ ) ,
379
+ LengthKind :: Varint => LengthDelimitedRecordsReader :: create (
276
380
next_file,
277
381
DelimitedLengthKind :: Varint ,
278
- ) ) ;
279
-
280
- self . local_state . current . as_mut ( ) . unwrap ( )
281
- }
282
- LengthKind :: SingleMessagePerFile => {
283
- let mut bytes = Vec :: new ( ) ;
284
- next_file. read_to_end ( & mut bytes) ?;
285
- return Ok ( Some ( bytes) ) ;
382
+ next_file_path,
383
+ ) ,
384
+ LengthKind :: SingleMessagePerFile => {
385
+ let mut bytes = Vec :: new ( ) ;
386
+ next_file. read_to_end ( & mut bytes) ?;
387
+ let size = bytes. len ( ) ;
388
+ return Ok ( Some ( StateContainerValue {
389
+ bytes,
390
+ path_reference : PathReference :: Owned ( next_file_path) ,
391
+ position : 0 ,
392
+ size,
393
+ } ) ) ;
394
+ }
286
395
}
287
396
}
288
397
} ;
289
398
290
- let Some ( next_message) = file_reader. try_get_next ( ) ? else {
291
- self . local_state . current = None ;
399
+ let Some ( Record {
400
+ position,
401
+ size,
402
+ bytes : next_message,
403
+ } ) = value. try_get_next ( ) ?
404
+ else {
292
405
return Ok ( None ) ;
293
406
} ;
294
407
295
- Ok ( Some ( next_message) )
408
+ self . local_state . current = Some ( value) ;
409
+ Ok ( Some ( StateContainerValue {
410
+ path_reference : PathReference :: Borrowed (
411
+ self . local_state . current . as_ref ( ) . unwrap ( ) . path ( ) ,
412
+ ) ,
413
+ bytes : next_message,
414
+ size : size as _ ,
415
+ position,
416
+ } ) )
296
417
}
297
418
}
298
419
0 commit comments