Skip to content

Commit

Permalink
feat: move decoder type to options struct (#23)
Browse files Browse the repository at this point in the history
Closes #20

This makes the API simpler to use. No flexibility is sacrificed, because
users who want to pass in a non-default-initialized decoder object can
just use the `TokenReader` and `Reader` `init` functions directly.
  • Loading branch information
ianprime0509 authored Oct 15, 2023
1 parent 9c6389d commit 11f9db5
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 34 deletions.
4 changes: 3 additions & 1 deletion bench/src/reader.zig
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ pub const main = @import("common.zig").main;

pub fn runBench(data: []const u8) !void {
var data_stream = std.io.fixedBufferStream(data);
var reader = xml.reader(std.heap.c_allocator, data_stream.reader(), xml.encoding.Utf8Decoder{}, .{});
var reader = xml.reader(std.heap.c_allocator, data_stream.reader(), .{
.DecoderType = xml.encoding.Utf8Decoder,
});
defer reader.deinit();
while (try reader.next()) |_| {}
}
4 changes: 3 additions & 1 deletion bench/src/token_reader.zig
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ pub const main = @import("common.zig").main;

pub fn runBench(data: []const u8) !void {
var data_stream = std.io.fixedBufferStream(data);
var token_reader = xml.tokenReader(data_stream.reader(), xml.encoding.Utf8Decoder{}, .{});
var token_reader = xml.tokenReader(data_stream.reader(), .{
.DecoderType = xml.encoding.Utf8Decoder,
});
while (try token_reader.next()) |_| {}
}
2 changes: 1 addition & 1 deletion examples/read.zig
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub fn main() !void {
const input_file = try std.fs.cwd().openFile(input_path, .{});
defer input_file.close();
var input_buffered_reader = std.io.bufferedReader(input_file.reader());
var reader = xml.reader(allocator, input_buffered_reader.reader(), xml.encoding.DefaultDecoder{}, .{});
var reader = xml.reader(allocator, input_buffered_reader.reader(), .{});
defer reader.deinit();

while (try reader.next()) |event| {
Expand Down
32 changes: 15 additions & 17 deletions src/reader.zig
Original file line number Diff line number Diff line change
Expand Up @@ -294,10 +294,9 @@ pub const NoOpNamespaceContext = struct {
pub fn reader(
allocator: Allocator,
r: anytype,
decoder: anytype,
comptime options: ReaderOptions,
) Reader(@TypeOf(r), @TypeOf(decoder), options) {
return Reader(@TypeOf(r), @TypeOf(decoder), options).init(allocator, r, decoder);
) Reader(@TypeOf(r), options) {
return Reader(@TypeOf(r), options).init(allocator, r, .{});
}

/// Reads a full XML document from a `std.io.Reader`.
Expand Down Expand Up @@ -354,6 +353,8 @@ pub fn readDocument(

/// Options for a `Reader`.
pub const ReaderOptions = struct {
/// The type of decoder to use.
DecoderType: type = encoding.DefaultDecoder,
/// The size of the internal buffer.
///
/// This limits the byte length of "non-splittable" content, such as
Expand Down Expand Up @@ -390,11 +391,7 @@ pub const ReaderOptions = struct {
/// Since this parser wraps a `TokenReader`, the caveats on the `buffer_size`
/// bounding the length of "non-splittable" content which are outlined in its
/// documentation apply here as well.
pub fn Reader(
comptime ReaderType: type,
comptime DecoderType: type,
comptime options: ReaderOptions,
) type {
pub fn Reader(comptime ReaderType: type, comptime options: ReaderOptions) type {
return struct {
token_reader: TokenReaderType,
/// A stack of element names enclosing the current context.
Expand Down Expand Up @@ -422,7 +419,8 @@ pub fn Reader(
allocator: Allocator,

const Self = @This();
const TokenReaderType = TokenReader(ReaderType, DecoderType, .{
const TokenReaderType = TokenReader(ReaderType, .{
.DecoderType = options.DecoderType,
.buffer_size = options.buffer_size,
.enable_normalization = options.enable_normalization,
.track_location = options.track_location,
Expand All @@ -439,7 +437,7 @@ pub fn Reader(
QNameNotAllowed,
} || Allocator.Error || TokenReaderType.Error;

pub fn init(allocator: Allocator, r: ReaderType, decoder: DecoderType) Self {
pub fn init(allocator: Allocator, r: ReaderType, decoder: options.DecoderType) Self {
return .{
.token_reader = TokenReaderType.init(r, decoder),
.event_arena = ArenaAllocator.init(allocator),
Expand Down Expand Up @@ -916,7 +914,7 @@ test "namespace handling" {

fn testValid(comptime options: ReaderOptions, input: []const u8, expected_events: []const Event) !void {
var input_stream = std.io.fixedBufferStream(input);
var input_reader = reader(testing.allocator, input_stream.reader(), encoding.Utf8Decoder{}, options);
var input_reader = reader(testing.allocator, input_stream.reader(), options);
defer input_reader.deinit();
var i: usize = 0;
while (try input_reader.next()) |event| : (i += 1) {
Expand All @@ -937,7 +935,7 @@ fn testValid(comptime options: ReaderOptions, input: []const u8, expected_events

fn testInvalid(comptime options: ReaderOptions, input: []const u8, expected_error: anyerror) !void {
var input_stream = std.io.fixedBufferStream(input);
var input_reader = reader(testing.allocator, input_stream.reader(), encoding.Utf8Decoder{}, options);
var input_reader = reader(testing.allocator, input_stream.reader(), options);
defer input_reader.deinit();
while (input_reader.next()) |_| {} else |err| {
try testing.expectEqual(expected_error, err);
Expand Down Expand Up @@ -966,7 +964,7 @@ test "nextNode" {
\\
\\
);
var input_reader = reader(testing.allocator, input_stream.reader(), encoding.Utf8Decoder{}, .{});
var input_reader = reader(testing.allocator, input_stream.reader(), .{});
defer input_reader.deinit();

try testing.expectEqualDeep(@as(?Event, .{ .xml_declaration = .{ .version = "1.0" } }), try input_reader.next());
Expand Down Expand Up @@ -1015,7 +1013,7 @@ test "nextNode namespace handling" {
\\ </child>
\\</a:root>
);
var input_reader = reader(testing.allocator, input_stream.reader(), encoding.Utf8Decoder{}, .{});
var input_reader = reader(testing.allocator, input_stream.reader(), .{});
defer input_reader.deinit();

var root_start = try input_reader.next();
Expand Down Expand Up @@ -1065,7 +1063,7 @@ test readDocument {
\\
\\
);
var document_node = try readDocument(testing.allocator, input_stream.reader(), encoding.Utf8Decoder{}, .{});
var document_node = try readDocument(testing.allocator, input_stream.reader(), .{});
defer document_node.deinit();

try testing.expectEqualDeep(Node.Document{ .version = "1.0", .children = &.{
Expand Down Expand Up @@ -1103,7 +1101,7 @@ test Children {
\\ <child2><!-- Comment --><child3/></child2>
\\</root>
);
var input_reader = reader(testing.allocator, input_stream.reader(), encoding.Utf8Decoder{}, .{});
var input_reader = reader(testing.allocator, input_stream.reader(), .{});
defer input_reader.deinit();

try testing.expectEqualDeep(@as(?Event, .{ .element_start = .{ .name = .{ .local = "root" } } }), try input_reader.next());
Expand Down Expand Up @@ -1135,7 +1133,7 @@ test "skip children" {
\\ <child2><!-- Comment --><child3/></child2>
\\</root>
);
var input_reader = reader(testing.allocator, input_stream.reader(), encoding.Utf8Decoder{}, .{});
var input_reader = reader(testing.allocator, input_stream.reader(), .{});
defer input_reader.deinit();

try testing.expectEqualDeep(@as(?Event, .{ .element_start = .{ .name = .{ .local = "root" } } }), try input_reader.next());
Expand Down
25 changes: 11 additions & 14 deletions src/token_reader.zig
Original file line number Diff line number Diff line change
Expand Up @@ -158,14 +158,15 @@ pub const NoOpLocation = struct {
/// (4096).
pub fn tokenReader(
reader: anytype,
decoder: anytype,
comptime options: TokenReaderOptions,
) TokenReader(@TypeOf(reader), @TypeOf(decoder), options) {
return TokenReader(@TypeOf(reader), @TypeOf(decoder), options).init(reader, decoder);
) TokenReader(@TypeOf(reader), options) {
return TokenReader(@TypeOf(reader), options).init(reader, .{});
}

/// Options for a `TokenReader`.
pub const TokenReaderOptions = struct {
/// The type of decoder to use.
DecoderType: type = encoding.DefaultDecoder,
/// The size of the internal buffer.
///
/// This limits the byte length of "non-splittable" content, such as
Expand Down Expand Up @@ -204,15 +205,11 @@ pub const TokenReaderOptions = struct {
/// important. Additionally, `buffer_size` limits the maximum byte length of
/// "unsplittable" content, such as element and attribute names (but not
/// "splittable" content, such as element text content and attribute values).
pub fn TokenReader(
comptime ReaderType: type,
comptime DecoderType: type,
comptime options: TokenReaderOptions,
) type {
pub fn TokenReader(comptime ReaderType: type, comptime options: TokenReaderOptions) type {
return struct {
scanner: Scanner,
reader: ReaderType,
decoder: DecoderType,
decoder: options.DecoderType,
/// The current location in the file (if enabled).
location: if (options.track_location) Location else NoOpLocation = .{},
/// Buffered content read by the reader for the current token.
Expand All @@ -232,11 +229,11 @@ pub fn TokenReader(
InvalidPiTarget,
Overflow,
UnexpectedEndOfInput,
} || ReaderType.Error || DecoderType.Error || Scanner.Error;
} || ReaderType.Error || options.DecoderType.Error || Scanner.Error;

const max_encoded_codepoint_len = @max(DecoderType.max_encoded_codepoint_len, 4);
const max_encoded_codepoint_len = @max(options.DecoderType.max_encoded_codepoint_len, 4);

pub fn init(reader: ReaderType, decoder: DecoderType) Self {
pub fn init(reader: ReaderType, decoder: options.DecoderType) Self {
return .{
.scanner = Scanner{},
.reader = reader,
Expand Down Expand Up @@ -510,7 +507,7 @@ test "PI target" {

fn testValid(comptime options: TokenReaderOptions, input: []const u8, expected_tokens: []const Token) !void {
var input_stream = std.io.fixedBufferStream(input);
var input_reader = tokenReader(input_stream.reader(), encoding.Utf8Decoder{}, options);
var input_reader = tokenReader(input_stream.reader(), options);
var i: usize = 0;
while (try input_reader.next()) |token| : (i += 1) {
if (i >= expected_tokens.len) {
Expand All @@ -530,7 +527,7 @@ fn testValid(comptime options: TokenReaderOptions, input: []const u8, expected_t

fn testInvalid(comptime options: TokenReaderOptions, input: []const u8, expected_error: anyerror) !void {
var input_stream = std.io.fixedBufferStream(input);
var input_reader = tokenReader(input_stream.reader(), encoding.Utf8Decoder{}, options);
var input_reader = tokenReader(input_stream.reader(), options);
while (input_reader.next()) |_| {} else |err| {
try testing.expectEqual(expected_error, err);
}
Expand Down

0 comments on commit 11f9db5

Please sign in to comment.