Skip to content

Commit

Permalink
feat: add namespace support to Writer (#41)
Browse files Browse the repository at this point in the history
  • Loading branch information
ianprime0509 authored Nov 2, 2024
2 parents 93c3444 + fc9b0d2 commit 8fda155
Show file tree
Hide file tree
Showing 8 changed files with 853 additions and 101 deletions.
13 changes: 12 additions & 1 deletion build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,24 @@ pub fn build(b: *Build) void {
docs_step.dependOn(&xml_docs_copy.step);

const install_examples_step = b.step("install-examples", "Build and install the example programs");

const example_reader_exe = b.addExecutable(.{
.name = "example-reader",
.name = "reader",
.root_source_file = b.path("examples/reader.zig"),
.target = target,
.optimize = optimize,
});
example_reader_exe.root_module.addImport("xml", xml);
const example_reader_install = b.addInstallArtifact(example_reader_exe, .{});
install_examples_step.dependOn(&example_reader_install.step);

const example_canonicalize_exe = b.addExecutable(.{
.name = "canonicalize",
.root_source_file = b.path("examples/canonicalize.zig"),
.target = target,
.optimize = optimize,
});
example_canonicalize_exe.root_module.addImport("xml", xml);
const example_canonicalize_install = b.addInstallArtifact(example_canonicalize_exe, .{});
install_examples_step.dependOn(&example_canonicalize_install.step);
}
91 changes: 91 additions & 0 deletions examples/canonicalize.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
const std = @import("std");
const log = std.log;
const xml = @import("xml");

pub fn main() !void {
var gpa_state: std.heap.GeneralPurposeAllocator(.{}) = .{};
defer _ = gpa_state.deinit();
const gpa = gpa_state.allocator();

var args_iter = try std.process.argsWithAllocator(gpa);
defer args_iter.deinit();
_ = args_iter.next();
var pretty = false;
var input: ?[]u8 = null;
defer if (input) |f| gpa.free(f);
while (args_iter.next()) |arg| {
if (std.mem.eql(u8, arg, "-p") or std.mem.eql(u8, arg, "--pretty")) {
pretty = true;
} else {
if (input != null) return error.InvalidArguments; // usage: canonicalize [-p|--pretty] file
input = try gpa.dupe(u8, arg);
}
}

var input_file = try std.fs.cwd().openFile(input orelse return error.InvalidArguments, .{});
defer input_file.close();
var doc = xml.streamingDocument(gpa, input_file.reader());
defer doc.deinit();
var reader = doc.reader(gpa, .{});
defer reader.deinit();

var stdout_buf = std.io.bufferedWriter(std.io.getStdOut().writer());
const stdout_output = xml.streamingOutput(stdout_buf.writer());
var writer = stdout_output.writer(gpa, .{
.indent = if (pretty) " " else "",
});
defer writer.deinit();

while (true) {
const node = reader.read() catch |err| switch (err) {
error.MalformedXml => {
const loc = reader.errorLocation();
log.err("{}:{}: {}", .{ loc.line, loc.column, reader.errorCode() });
return error.MalformedXml;
},
else => |other| return other,
};
switch (node) {
.eof => break,
.xml_declaration, .comment => {}, // ignored in canonical form
.element_start => {
try writer.elementStart(reader.elementName());

const sorted_attrs = try gpa.alloc(usize, reader.attributeCount());
defer gpa.free(sorted_attrs);
for (0..reader.attributeCount()) |i| sorted_attrs[i] = i;
std.sort.pdq(usize, sorted_attrs, reader, struct {
fn lessThan(r: @TypeOf(reader), lhs: usize, rhs: usize) bool {
return std.mem.lessThan(u8, r.attributeName(lhs), r.attributeName(rhs));
}
}.lessThan);
for (sorted_attrs) |i| {
try writer.attribute(reader.attributeName(i), try reader.attributeValue(i));
}
},
.element_end => {
try writer.elementEnd();
},
.pi => {
try writer.pi(reader.piTarget(), try reader.piData());
},
.text => {
try writer.text(try reader.text());
},
.cdata => {
try writer.text(try reader.cdata());
},
.character_reference => {
var buf: [4]u8 = undefined;
const len = std.unicode.utf8Encode(reader.characterReferenceChar(), &buf) catch unreachable;
try writer.text(buf[0..len]);
},
.entity_reference => {
const value = xml.predefined_entities.get(reader.entityReferenceName()) orelse unreachable;
try writer.text(value);
},
}
}

try stdout_buf.flush();
}
2 changes: 1 addition & 1 deletion examples/reader.zig
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub fn main() !void {
const args = try std.process.argsAlloc(gpa);
defer std.process.argsFree(gpa, args);
if (args.len != 2) {
return error.InvalidArguments; // usage: example-reader file
return error.InvalidArguments; // usage: reader file
}

var input_file = try std.fs.cwd().openFile(args[1], .{});
Expand Down
44 changes: 43 additions & 1 deletion fuzz/src/fuzz.zig
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,53 @@ fn fuzz(gpa: Allocator, input: []const u8) !void {
var doc = xml.StaticDocument.init(input);
var reader = doc.reader(gpa, .{});
defer reader.deinit();

var out_bytes = std.ArrayList(u8).init(gpa);
defer out_bytes.deinit();
const output = xml.streamingOutput(out_bytes.writer());
var writer = output.writer(gpa, .{});
defer writer.deinit();

while (true) {
const node = reader.read() catch |err| switch (err) {
error.MalformedXml => break,
error.OutOfMemory => return error.OutOfMemory,
};
if (node == .eof) break;
switch (node) {
.eof => break,
.xml_declaration => {
try writer.xmlDeclaration(reader.xmlDeclarationEncoding(), reader.xmlDeclarationStandalone());
},
.comment => {
// TODO: not implemented yet
},
.element_start => {
try writer.elementStart(reader.elementName());
for (0..reader.attributeCount()) |i| {
try writer.attribute(reader.attributeName(i), try reader.attributeValue(i));
}
},
.element_end => {
try writer.elementEnd();
},
.pi => {
try writer.pi(reader.piTarget(), try reader.piData());
},
.text => {
try writer.text(try reader.text());
},
.cdata => {
try writer.text(try reader.cdata());
},
.character_reference => {
var buf: [4]u8 = undefined;
const len = std.unicode.utf8Encode(reader.characterReferenceChar(), &buf) catch unreachable;
try writer.text(buf[0..len]);
},
.entity_reference => {
const value = xml.predefined_entities.get(reader.entityReferenceName()) orelse unreachable;
try writer.text(value);
},
}
}
}
2 changes: 1 addition & 1 deletion src/Reader.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2236,6 +2236,6 @@ fn addString(reader: *Reader, s: []const u8) !StringIndex {
return @enumFromInt(start);
}

fn string(reader: Reader, index: StringIndex) []const u8 {
fn string(reader: *const Reader, index: StringIndex) []const u8 {
return std.mem.sliceTo(reader.strings.items[@intFromEnum(index)..], 0);
}
Loading

0 comments on commit 8fda155

Please sign in to comment.