From 928e51ca410fa87f86d65148b52399e40777a7db Mon Sep 17 00:00:00 2001
From: Ian Johnson <ian@ianjohnson.dev>
Date: Tue, 13 Aug 2024 23:33:42 -0400
Subject: [PATCH] refactor: rewrite the project

This is a complete rewrite of the project, using a more efficient and flexible
design based around the concept of a forward-only "view" into the source
document. Some capabilities are regressed in this initial implementation,
including built-in non-UTF-8 support and some of the writer functionality.
---
 .editorconfig                       |   11 +
 .github/workflows/ci.yml            |    2 +-
 .gitignore                          |    6 -
 README.md                           |  187 +--
 bench/build.zig                     |    8 -
 bench/build.zig.zon                 |    4 +-
 bench/src/reader.zig                |    8 +-
 bench/src/scanner.zig               |   16 -
 bench/src/token_reader.zig          |   15 -
 build.zig                           |  167 +-
 build.zig.zon                       |    2 +-
 examples/read.zig                   |   46 -
 examples/reader.zig                 |  105 ++
 examples/scan.zig                   |   53 -
 fuzz/.gitignore                     |    1 +
 fuzz/build.zig                      |   34 +
 fuzz/build.zig.zon                  |   18 +
 fuzz/dictionaries/xml_UTF_16.dict   |  103 --
 fuzz/dictionaries/xml_UTF_16BE.dict |  103 --
 fuzz/dictionaries/xml_UTF_16LE.dict |  103 --
 fuzz/inputs/invalid-utf16be.xml     |  Bin 82 -> 0 bytes
 fuzz/inputs/invalid-utf16le.xml     |  Bin 82 -> 0 bytes
 fuzz/inputs/invalid.xml             |    1 -
 fuzz/inputs/sample.xml              |    9 +
 fuzz/inputs/valid-utf16be.xml       |  Bin 274 -> 0 bytes
 fuzz/inputs/valid-utf16le.xml       |  Bin 274 -> 0 bytes
 fuzz/inputs/valid.xml               |    7 -
 fuzz/main.zig                       |   30 -
 fuzz/src/fuzz.zig                   |   26 +
 src/Reader.zig                      | 2216 +++++++++++++++++++++++++++
 src/Scanner.zig                     | 2045 ------------------------
 src/Writer.zig                      |  198 +++
 src/compat.zig                      |   17 -
 src/encoding.zig                    |  451 ------
 src/node.zig                        |   60 -
 src/reader.zig                      | 1149 --------------
 src/syntax.zig                      |  106 --
 src/token_reader.zig                |  621 --------
 src/writer.zig                      |  264 ----
 src/xml.zig                         |  509 +++++-
 test/xmlconf.zig                    |  471 ------
 xmlconf/build.zig                   |   44 +
 xmlconf/build.zig.zon               |   18 +
 xmlconf/src/xmlconf.zig             |  456 ++++++
 44 files changed, 3640 insertions(+), 6050 deletions(-)
 create mode 100644 .editorconfig
 delete mode 100644 bench/src/scanner.zig
 delete mode 100644 bench/src/token_reader.zig
 delete mode 100644 examples/read.zig
 create mode 100644 examples/reader.zig
 delete mode 100644 examples/scan.zig
 create mode 100644 fuzz/.gitignore
 create mode 100644 fuzz/build.zig
 create mode 100644 fuzz/build.zig.zon
 delete mode 100644 fuzz/dictionaries/xml_UTF_16.dict
 delete mode 100644 fuzz/dictionaries/xml_UTF_16BE.dict
 delete mode 100644 fuzz/dictionaries/xml_UTF_16LE.dict
 delete mode 100644 fuzz/inputs/invalid-utf16be.xml
 delete mode 100644 fuzz/inputs/invalid-utf16le.xml
 delete mode 100644 fuzz/inputs/invalid.xml
 create mode 100644 fuzz/inputs/sample.xml
 delete mode 100644 fuzz/inputs/valid-utf16be.xml
 delete mode 100644 fuzz/inputs/valid-utf16le.xml
 delete mode 100644 fuzz/inputs/valid.xml
 delete mode 100644 fuzz/main.zig
 create mode 100644 fuzz/src/fuzz.zig
 create mode 100644 src/Reader.zig
 delete mode 100644 src/Scanner.zig
 create mode 100644 src/Writer.zig
 delete mode 100644 src/compat.zig
 delete mode 100644 src/encoding.zig
 delete mode 100644 src/node.zig
 delete mode 100644 src/reader.zig
 delete mode 100644 src/syntax.zig
 delete mode 100644 src/token_reader.zig
 delete mode 100644 src/writer.zig
 delete mode 100644 test/xmlconf.zig
 create mode 100644 xmlconf/build.zig
 create mode 100644 xmlconf/build.zig.zon
 create mode 100644 xmlconf/src/xmlconf.zig

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..98d0681
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,11 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+indent_style = space
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[*.xml]
+indent_size = 2
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a3ee6e5..cd6e3f8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        zig-version: [0.12.0, 0.13.0, master]
+        zig-version: [0.13.0, master]
     steps:
       - name: Checkout
         uses: actions/checkout@v3
diff --git a/.gitignore b/.gitignore
index f33e3bd..d8c8979 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,2 @@
-bench/*.xml
-callgrind.out.*
-core*
-fuzz/outputs
-test/xmlconf
 .zig-cache
-zig-cache
 zig-out
diff --git a/README.md b/README.md
index 3abe6ee..88a5af4 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # zig-xml
 
-zig-xml is an XML library for Zig, currently supporting Zig 0.12.0, 0.13.0, and
-the latest master at the time of writing.
+zig-xml is an XML library for Zig, currently supporting Zig 0.13.0 and the
+latest master at the time of writing.
 
 See the documentation in the code for more information about the available APIs
 (start in `xml.zig`). Autodocs are also published to GitHub Pages:
@@ -12,185 +12,26 @@ The library aims to confirm with the following standards:
 - [XML 1.0 Fifth Edition](https://www.w3.org/TR/2008/REC-xml-20081126/)
 - [XML Namespaces 1.0 Third Edition](https://www.w3.org/TR/2009/REC-xml-names-20091208/)
 
-Other standards (such as XML 1.1 or XML 1.0 prior to the fifth edition) are only
-supported insofar as they are compatible with the above standards. In practice,
-this should not make much difference, since XML 1.1 is rarely used, and the
-differences between XML 1.0 editions are minor (the XML 1.0 fifth edition
-standard allows many more characters in names than previous editions, subsuming
-the
-[only non-harmful feature of XML 1.1](http://www.ibiblio.org/xml/books/effectivexml/chapters/03.html)).
-
-## Feature overview
-
-Key for the list:
+Currently, DTDs (DOCTYPE) are not supported, nor is any non-UTF-8 encoding.
 
-- ✅ Supported
-- 🚧 Partially supported
-- ❌ Unsupported, but planned
-- ❓️ Unsupported, maybe planned (long-term)
-- 👎️ Unsupported, not planned
-
-Features:
-
-- ✅ Streaming parser (three options are available, `Reader` is the most
-  general-purpose but also the slowest)
-  - ✅ Core XML 1.0 language minus `DOCTYPE`
-  - ✅ Well-formedness checks not involving DTD (varying degrees of lesser
-    support in `TokenReader` and `Scanner`)
-  - ✅ End-of-line and attribute value normalization (in `Reader` and
-    `TokenReader` only, optional)
-  - ✅ Namespace support (in `Reader` only, optional)
-  - 🚧 Detailed errors
-  - 🚧 Source location tracking
-  - ❌ `DOCTYPE` (just parsing, not doing anything with it)
-    (https://github.com/ianprime0509/zig-xml/issues/9)
-  - ❓️ Non-validating `DOCTYPE` handling (entity expansion, further attribute
-    value normalization for non-`CDATA` types) (no external DTD content)
-  - ❓️ Hooks for loading external DTD content
-  - ❓️ XML 1.1
-  - 👎️ Validation
-- 🚧 DOM parser (current `Node` abstraction is limited and read-only)
-- ✅ Unicode
-  - ✅ UTF-8
-  - ✅ UTF-16
-  - ✅ UTF-8 vs UTF-16 auto-detection (`DefaultDecoder`)
-  - ❌ US-ASCII (this is for support of US-ASCII as its own encoding; note that
-    all ASCII can be treated as UTF-8)
-  - ❌ ISO 8859-1
-  - ❓️ Other encodings besides these
-  - ✅ User-definable additional encodings (meaning even though this library
-    doesn't provide other encodings out of the box, you can write them yourself)
-- 🚧 XML writer (https://github.com/ianprime0509/zig-xml/issues/10)
-- 👎️ XPath, XML Schema, other XML-related stuff
+Other standards (such as XML 1.1 or XML 1.0 prior to the fifth edition) are only
+supported insofar as they are compatible with the above standards.
 
 ## Examples
 
-See the `examples` directory (these examples are not very good right now but
-they do show how to use most of the library).
-
-Another ("real-world") example can be found in the zig-gobject project:
-https://github.com/ianprime0509/zig-gobject/blob/main/src/gir.zig
+A basic example of usage can be found in the `examples` directory, and can be
+built using `zig build install-examples`.
 
 ## Tests
 
-There are several tests in the project itself using the standard Zig test
-system. These tests can be run using `zig build test`.
-
-There is also a runner for the
-[W3C XML Conformance Test Suite](https://www.w3.org/XML/Test/) under
-`test/xmlconf.zig`. To build this runner as a standalone executable, run
-`zig build install-xmlconf`. If you download the 20130923 version of the test
-suite and place the `xmlconf` directory under `test`, you can also use
-`zig build run-xmlconf` to run all the test suites the runner can currently
-understand. The test suite files are not contained directly in this repository
-due to unclear licensing and file size (16MB uncompressed).
-
-At the time of writing, the library passes all the conformance tests it is able
-to run (353 of them); the other tests are skipped because they involve doctype
-in one way or another or are for XML standards which aren't supported (XML 1.1,
-editions of XML 1.0 besides the fifth edition).
-
-## Fuzzing
-
-This library has some basic support for fuzz testing, taking its basic method
-from the article
-[Fuzzing Zig Code Using AFL++](https://www.ryanliptak.com/blog/fuzzing-zig-code/).
-To start fuzzing, you will need
-[AFL++](https://github.com/AFLplusplus/AFLplusplus), specifically
-`afl-clang-lto` and `afl-fuzz`, in your path. Then, you can run
-`zig build fuzz`. To resume a prior fuzzing session, pass `-Dresume=true`.
-
-You can also run `zig build install-fuzz` to just build the fuzz executable and
-then run it with `afl-fuzz` separately.
-
-Finally, if any crashes are identified during fuzzing, they can be replayed by
-feeding the crash input back to `zig build fuzz-reproduce`, which will yield an
-error trace for further debugging.
-
-## Benchmarking and performance
-
-**TL;DR:** `Reader` and `TokenReader` are relatively slow compared to other
-popular libraries. `Scanner` is faster (on a similar level as yxml), but
-comparatively doesn't do very much.
-
-There is a benchmarking setup in the `bench` directory. The benchmark is for
-parsing through an entire XML file without doing any additional processing. The
-XML file is loaded completely into memory first, then the parser is executed on
-it until it completes.
-
-Below are some benchmarking results as of August 14, 2023, using Zig
-`0.12.0-dev.906+2d7d037c4`, as performed on my laptop. The results were obtained
-by executing [poop](https://github.com/andrewrk/poop) on the benchmark
-implementations.
-
-### GTK 4 GIR
-
-This is a 5.7MB XML file containing GObject introspection metadata for GTK 4. In
-the output below, libxml2 is used as the baseline. The three benchmarks
-`reader`, `token_reader`, and `scanner` test the three APIs provided by this
-library, and the mxml and yxml libraries are also included for comparison.
-
-```
-Benchmark 1 (78 runs): zig-out/bin/libxml2 Gtk-4.0.gir
-  measurement          mean ± σ            min … max           outliers         delta
-  wall_time          64.2ms ± 1.87ms    55.5ms … 70.1ms          4 ( 5%)        0%
-  peak_rss           14.6MB ± 76.4KB    14.4MB … 14.7MB          0 ( 0%)        0%
-  cpu_cycles          196M  ± 1.03M      194M  …  200M           3 ( 4%)        0%
-  instructions        409M  ± 43.1       409M  …  409M           0 ( 0%)        0%
-  cache_references   5.44M  ±  325K     5.08M  … 6.97M           5 ( 6%)        0%
-  cache_misses       66.0K  ± 5.36K     55.0K  … 91.0K           3 ( 4%)        0%
-  branch_misses       874K  ± 3.80K      868K  …  890K           1 ( 1%)        0%
-
-Benchmark 2 (30 runs): zig-out/bin/reader Gtk-4.0.gir
-  measurement          mean ± σ            min … max           outliers         delta
-  wall_time           170ms ± 1.59ms     167ms …  173ms          0 ( 0%)        💩+164.2% ±  1.2%
-  peak_rss           7.29MB ± 73.8KB    7.08MB … 7.34MB          0 ( 0%)        ⚡- 50.0% ±  0.2%
-  cpu_cycles          583M  ± 2.88M      579M  …  590M           0 ( 0%)        💩+196.9% ±  0.4%
-  instructions       1.38G  ± 32.2      1.38G  … 1.38G           0 ( 0%)        💩+237.2% ±  0.0%
-  cache_references    751K  ±  135K      580K  … 1.12M           0 ( 0%)        ⚡- 86.2% ±  2.2%
-  cache_misses       17.5K  ± 5.41K     12.9K  … 34.5K           3 (10%)        ⚡- 73.5% ±  3.5%
-  branch_misses      1.06M  ± 10.9K     1.05M  … 1.11M           2 ( 7%)        💩+ 21.5% ±  0.3%
-
-Benchmark 3 (38 runs): zig-out/bin/token_reader Gtk-4.0.gir
-  measurement          mean ± σ            min … max           outliers         delta
-  wall_time           135ms ± 1.59ms     132ms …  138ms          0 ( 0%)        💩+110.4% ±  1.1%
-  peak_rss           7.31MB ± 54.2KB    7.21MB … 7.34MB          8 (21%)        ⚡- 49.8% ±  0.2%
-  cpu_cycles          462M  ± 2.20M      459M  …  467M           0 ( 0%)        💩+135.5% ±  0.3%
-  instructions       1.14G  ± 21.0      1.14G  … 1.14G           0 ( 0%)        💩+179.9% ±  0.0%
-  cache_references    237K  ± 7.40K      225K  …  255K           0 ( 0%)        ⚡- 95.6% ±  1.9%
-  cache_misses       10.1K  ± 1.29K     8.16K  … 13.2K           0 ( 0%)        ⚡- 84.8% ±  2.7%
-  branch_misses       815K  ±  919       813K  …  816K           3 ( 8%)        ⚡-  6.8% ±  0.1%
-
-Benchmark 4 (103 runs): zig-out/bin/scanner Gtk-4.0.gir
-  measurement          mean ± σ            min … max           outliers         delta
-  wall_time          48.6ms ± 1.82ms    45.8ms … 55.2ms          4 ( 4%)        ⚡- 24.3% ±  0.8%
-  peak_rss           7.27MB ± 87.8KB    7.08MB … 7.34MB          0 ( 0%)        ⚡- 50.1% ±  0.2%
-  cpu_cycles          152M  ± 3.48M      151M  …  177M           5 ( 5%)        ⚡- 22.4% ±  0.4%
-  instructions        472M  ± 19.9       472M  …  472M           0 ( 0%)        💩+ 15.6% ±  0.0%
-  cache_references    209K  ± 1.80K      207K  …  222K           4 ( 4%)        ⚡- 96.2% ±  1.2%
-  cache_misses       7.95K  ±  179      7.59K  … 8.50K           0 ( 0%)        ⚡- 88.0% ±  1.6%
-  branch_misses       511K  ±  874       510K  …  518K          13 (13%)        ⚡- 41.6% ±  0.1%
-
-Benchmark 5 (63 runs): zig-out/bin/mxml Gtk-4.0.gir
-  measurement          mean ± σ            min … max           outliers         delta
-  wall_time          80.2ms ± 2.44ms    76.0ms … 87.9ms          3 ( 5%)        💩+ 24.9% ±  1.1%
-  peak_rss           7.44MB ± 56.3KB    7.34MB … 7.47MB         15 (24%)        ⚡- 48.9% ±  0.2%
-  cpu_cycles          262M  ± 2.95M      258M  …  281M           1 ( 2%)        💩+ 33.4% ±  0.4%
-  instructions        762M  ± 56.7K      762M  …  762M           3 ( 5%)        💩+ 86.4% ±  0.0%
-  cache_references    401K  ±  473K      272K  … 3.08M          10 (16%)        ⚡- 92.6% ±  2.4%
-  cache_misses       14.2K  ± 2.62K     12.0K  … 31.1K           2 ( 3%)        ⚡- 78.5% ±  2.2%
-  branch_misses      1.02M  ± 99.5K      998K  … 1.79M           4 ( 6%)        💩+ 16.3% ±  2.5%
+The library has several tests of its own, which can be run using `zig build test`.
 
-Benchmark 6 (196 runs): zig-out/bin/yxml Gtk-4.0.gir
-  measurement          mean ± σ            min … max           outliers         delta
-  wall_time          25.4ms ± 1.03ms    23.9ms … 34.3ms          3 ( 2%)        ⚡- 60.4% ±  0.5%
-  peak_rss           7.29MB ± 77.0KB    7.08MB … 7.34MB          0 ( 0%)        ⚡- 50.0% ±  0.1%
-  cpu_cycles         71.0M  ± 1.03M     70.5M  … 84.2M           5 ( 3%)        ⚡- 63.8% ±  0.1%
-  instructions        236M  ± 20.1       236M  …  236M           0 ( 0%)        ⚡- 42.2% ±  0.0%
-  cache_references    202K  ±  805       201K  …  210K           7 ( 4%)        ⚡- 96.3% ±  0.8%
-  cache_misses       8.00K  ±  215      7.64K  … 9.57K           4 ( 2%)        ⚡- 87.9% ±  1.1%
-  branch_misses       239K  ±  787       238K  …  248K          21 (11%)        ⚡- 72.7% ±  0.1%
-```
+The `xmlconf` directory additionally contains a runner for the [W3C XML
+Conformance Test Suite](https://www.w3.org/XML/Test/). Running `zig build test`
+in that directory will fetch the test suite distribution tarball and run the
+tests within. Due to features missing in the current parser implementation (DTD
+support), many tests are currently skipped. At the time of writing, 250 tests
+pass, and 924 are skipped due to unsupported features.
 
 ## License
 
diff --git a/bench/build.zig b/bench/build.zig
index 2be01c6..dd8058d 100644
--- a/bench/build.zig
+++ b/bench/build.zig
@@ -5,14 +5,6 @@ const Step = Build.Step;
 pub fn build(b: *Build) !void {
     const xml = b.dependency("xml", .{}).module("xml");
 
-    const bench_scanner = addBench(b, "scanner");
-    bench_scanner.root_module.addImport("xml", xml);
-    bench_scanner.linkLibC();
-
-    const bench_token_reader = addBench(b, "token_reader");
-    bench_token_reader.root_module.addImport("xml", xml);
-    bench_token_reader.linkLibC();
-
     const bench_reader = addBench(b, "reader");
     bench_reader.root_module.addImport("xml", xml);
     bench_reader.linkLibC();
diff --git a/bench/build.zig.zon b/bench/build.zig.zon
index 50bfb4f..bfdc142 100644
--- a/bench/build.zig.zon
+++ b/bench/build.zig.zon
@@ -12,8 +12,8 @@
             .path = "..",
         },
         .libxml2 = .{
-            .url = "git+https://github.com/ianprime0509/zig-libxml2#9a88110c7ea7a541cb6ead6a648c69a8fc929141",
-            .hash = "1220b556b7c193580caa53db7e95ad31c0ac589af8adcd894594b93dd1f7875b9405",
+            .url = "git+https://github.com/ianprime0509/zig-libxml2?ref=main#6cebb963e0ad5789825eb2333a4d21fab8f35a92",
+            .hash = "12200f672ceb8df0c715a7018e5c53ad434db17f900c620e6238f178cc9a9d80b88e",
         },
         .mxml = .{
             .url = "git+https://github.com/michaelrsweet/mxml.git#809204a3051607f54b57e2950f3a5520d79ae383",
diff --git a/bench/src/reader.zig b/bench/src/reader.zig
index 8a82fbb..dedbae3 100644
--- a/bench/src/reader.zig
+++ b/bench/src/reader.zig
@@ -4,10 +4,8 @@ const xml = @import("xml");
 pub const main = @import("common.zig").main;
 
 pub fn runBench(data: []const u8) !void {
-    var data_stream = std.io.fixedBufferStream(data);
-    var reader = xml.reader(std.heap.c_allocator, data_stream.reader(), .{
-        .DecoderType = xml.encoding.Utf8Decoder,
-    });
+    var doc = xml.StaticDocument.init(data);
+    var reader = doc.reader(std.heap.c_allocator, .{});
     defer reader.deinit();
-    while (try reader.next()) |_| {}
+    while (try reader.read() != .eof) {}
 }
diff --git a/bench/src/scanner.zig b/bench/src/scanner.zig
deleted file mode 100644
index 933f168..0000000
--- a/bench/src/scanner.zig
+++ /dev/null
@@ -1,16 +0,0 @@
-const std = @import("std");
-const xml = @import("xml");
-
-pub const main = @import("common.zig").main;
-
-pub fn runBench(data: []const u8) !void {
-    var scanner = xml.Scanner{};
-    var data_stream = std.io.fixedBufferStream(data);
-    var decoder = xml.encoding.Utf8Decoder{};
-    var buf: [4]u8 = undefined;
-    while (true) {
-        const c = try decoder.readCodepoint(data_stream.reader(), &buf);
-        if (!c.present) break;
-        _ = try scanner.next(c.codepoint, c.byte_length);
-    }
-}
diff --git a/bench/src/token_reader.zig b/bench/src/token_reader.zig
deleted file mode 100644
index 8858949..0000000
--- a/bench/src/token_reader.zig
+++ /dev/null
@@ -1,15 +0,0 @@
-const std = @import("std");
-const xml = @import("xml");
-
-pub const main = @import("common.zig").main;
-
-pub fn runBench(data: []const u8) !void {
-    var data_stream = std.io.fixedBufferStream(data);
-    var token_reader = xml.tokenReader(data_stream.reader(), .{
-        .DecoderType = xml.encoding.Utf8Decoder,
-    });
-    while (true) {
-        const token = try token_reader.next();
-        if (token == .eof) break;
-    }
-}
diff --git a/build.zig b/build.zig
index bfe58f4..55fdf6b 100644
--- a/build.zig
+++ b/build.zig
@@ -8,173 +8,40 @@ pub fn build(b: *Build) void {
 
     const xml = b.addModule("xml", .{
         .root_source_file = b.path("src/xml.zig"),
-    });
-
-    addTests(b, target, optimize, xml);
-    addDocs(b, target);
-    addExamples(b, target, optimize, xml);
-    addFuzz(b, target, xml);
-}
-
-fn addTests(b: *Build, target: Build.ResolvedTarget, optimize: Mode, xml: *Build.Module) void {
-    const main_tests = b.addTest(.{
-        .root_source_file = b.path("src/xml.zig"),
         .target = target,
         .optimize = optimize,
     });
 
-    const run_main_tests = b.addRunArtifact(main_tests);
-
-    const test_step = b.step("test", "Run library tests");
-    test_step.dependOn(&run_main_tests.step);
-
-    const xmlconf_exe = b.addExecutable(.{
-        .name = "xmlconf",
-        .root_source_file = b.path("test/xmlconf.zig"),
+    const test_step = b.step("test", "Run the tests");
+    const xml_test = b.addTest(.{
+        .root_source_file = b.path("src/xml.zig"),
         .target = target,
-        .optimize = optimize,
-    });
-    xmlconf_exe.root_module.addImport("xml", xml);
-
-    const install_xmlconf_step = b.step("install-xmlconf", "Install xmlconf test runner");
-    install_xmlconf_step.dependOn(&b.addInstallArtifact(xmlconf_exe, .{}).step);
-
-    const run_xmlconf_exe = b.addRunArtifact(xmlconf_exe);
-    if (b.args) |args| {
-        run_xmlconf_exe.addArgs(args);
-    }
-    // Since we can't yet handle doctypes, the test files need to be specified
-    // individually
-    run_xmlconf_exe.addArgs(&.{
-        "test/xmlconf/eduni/errata-2e/errata2e.xml",
-        "test/xmlconf/eduni/errata-3e/errata3e.xml",
-        "test/xmlconf/eduni/errata-4e/errata4e.xml",
-        "test/xmlconf/eduni/misc/ht-bh.xml",
-        "test/xmlconf/eduni/namespaces/1.0/rmt-ns10.xml",
-        "test/xmlconf/eduni/namespaces/1.1/rmt-ns11.xml",
-        "test/xmlconf/eduni/namespaces/errata-1e/errata1e.xml",
-        "test/xmlconf/eduni/xml-1.1/xml11.xml",
-        "test/xmlconf/ibm/ibm_oasis_invalid.xml",
-        "test/xmlconf/ibm/ibm_oasis_not-wf.xml",
-        "test/xmlconf/ibm/ibm_oasis_valid.xml",
-        "test/xmlconf/japanese/japanese.xml",
-        "test/xmlconf/oasis/oasis.xml",
-        // The test case files in the sun directory do not have an enclosing
-        // TESTCASES element, and only work when directly substituted as entity
-        // content, so they cannot be used at this time.
-        "test/xmlconf/xmltest/xmltest.xml",
     });
+    const xml_test_run = b.addRunArtifact(xml_test);
+    test_step.dependOn(&xml_test_run.step);
 
-    const run_xmlconf_step = b.step("run-xmlconf", "Run xmlconf test cases");
-    run_xmlconf_step.dependOn(&run_xmlconf_exe.step);
-}
-
-fn addDocs(b: *Build, target: Build.ResolvedTarget) void {
-    const obj = b.addObject(.{
+    const docs_step = b.step("docs", "Build the documentation");
+    const xml_docs = b.addObject(.{
         .name = "xml",
         .root_source_file = b.path("src/xml.zig"),
         .target = target,
         .optimize = .Debug,
     });
-    const docs_path = obj.getEmittedDocs();
-
-    const install_docs = b.addInstallDirectory(.{
-        .source_dir = docs_path,
+    const xml_docs_copy = b.addInstallDirectory(.{
+        .source_dir = xml_docs.getEmittedDocs(),
         .install_dir = .prefix,
         .install_subdir = "docs",
     });
+    docs_step.dependOn(&xml_docs_copy.step);
 
-    const docs_step = b.step("docs", "Generate documentation");
-    docs_step.dependOn(&install_docs.step);
-}
-
-fn addExamples(b: *Build, target: Build.ResolvedTarget, optimize: Mode, xml: *Build.Module) void {
-    const install_examples_step = b.step("install-examples", "Install examples");
-
-    const scan_exe = b.addExecutable(.{
-        .name = "scan",
-        .root_source_file = b.path("examples/scan.zig"),
-        .target = target,
-        .optimize = optimize,
-    });
-    scan_exe.root_module.addImport("xml", xml);
-    install_examples_step.dependOn(&b.addInstallArtifact(scan_exe, .{}).step);
-
-    const run_scan_exe = b.addRunArtifact(scan_exe);
-    if (b.args) |args| {
-        run_scan_exe.addArgs(args);
-    }
-
-    const run_scan_step = b.step("run-example-scan", "Run scan example");
-    run_scan_step.dependOn(&run_scan_exe.step);
-
-    const read_exe = b.addExecutable(.{
-        .name = "read",
-        .root_source_file = b.path("examples/read.zig"),
+    const install_examples_step = b.step("install-examples", "Build and install the example programs");
+    const example_reader_exe = b.addExecutable(.{
+        .name = "example-reader",
+        .root_source_file = b.path("examples/reader.zig"),
         .target = target,
         .optimize = optimize,
     });
-    read_exe.root_module.addImport("xml", xml);
-    install_examples_step.dependOn(&b.addInstallArtifact(read_exe, .{}).step);
-
-    const run_read_exe = b.addRunArtifact(read_exe);
-    if (b.args) |args| {
-        run_read_exe.addArgs(args);
-    }
-
-    const run_read_step = b.step("run-example-read", "Run read example");
-    run_read_step.dependOn(&run_read_exe.step);
-}
-
-fn addFuzz(b: *Build, target: Build.ResolvedTarget, xml: *Build.Module) void {
-    // Thanks to https://www.ryanliptak.com/blog/fuzzing-zig-code/ for the basis of this!
-    const fuzz_lib = b.addStaticLibrary(.{
-        .name = "fuzz",
-        .root_source_file = b.path("fuzz/main.zig"),
-        .target = target,
-        .optimize = .Debug,
-    });
-    fuzz_lib.want_lto = true;
-    fuzz_lib.bundle_compiler_rt = true;
-    fuzz_lib.root_module.addImport("xml", xml);
-
-    const fuzz_compile = b.addSystemCommand(&.{ "afl-clang-lto", "-o" });
-    const fuzz_exe = fuzz_compile.addOutputFileArg("fuzz");
-    fuzz_compile.addArtifactArg(fuzz_lib);
-    const fuzz_install = b.addInstallBinFile(fuzz_exe, "fuzz");
-
-    const run_fuzz_compile_step = b.step("install-fuzz", "Build executable for fuzz testing using afl-clang-lto");
-    run_fuzz_compile_step.dependOn(&fuzz_install.step);
-
-    const run_fuzz = b.addSystemCommand(&.{"afl-fuzz"});
-    run_fuzz.addArg("-i");
-    if (b.option(bool, "resume", "Resume fuzzing rather than starting a new run") orelse false) {
-        run_fuzz.addArg("-");
-    } else {
-        run_fuzz.addArg(b.pathJoin(&.{ "fuzz", "inputs" }));
-    }
-    run_fuzz.addArgs(&.{ "-o", b.pathJoin(&.{ "fuzz", "outputs" }) });
-    const dictionaries = &[_][]const u8{ "xml.dict", "xml_UTF_16.dict", "xml_UTF_16BE.dict", "xml_UTF_16LE.dict" };
-    for (dictionaries) |dictionary| {
-        run_fuzz.addArgs(&.{ "-x", b.pathJoin(&.{ "fuzz", "dictionaries", dictionary }) });
-    }
-    run_fuzz.addFileArg(fuzz_exe);
-    const run_fuzz_step = b.step("fuzz", "Execute afl-fuzz with the fuzz testing executable");
-    run_fuzz_step.dependOn(&run_fuzz.step);
-
-    const fuzz_reproduce_exe = b.addExecutable(.{
-        .name = "fuzz-reproduce",
-        .root_source_file = b.path("fuzz/main.zig"),
-        .target = target,
-        .optimize = .Debug,
-    });
-    fuzz_reproduce_exe.root_module.addImport("xml", xml);
-
-    const run_fuzz_reproduce_exe = b.addRunArtifact(fuzz_reproduce_exe);
-    if (b.args) |args| {
-        run_fuzz_reproduce_exe.addArgs(args);
-    }
-
-    const run_fuzz_reproduce_step = b.step("fuzz-reproduce", "Reproduce crash found by fuzzing");
-    run_fuzz_reproduce_step.dependOn(&run_fuzz_reproduce_exe.step);
+    example_reader_exe.root_module.addImport("xml", xml);
+    const example_reader_install = b.addInstallArtifact(example_reader_exe, .{});
+    install_examples_step.dependOn(&example_reader_install.step);
 }
diff --git a/build.zig.zon b/build.zig.zon
index 8d81938..6eb80a1 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -1,5 +1,5 @@
 .{
-    .name = "zig-xml",
+    .name = "xml",
     .version = "0.1.0",
     .paths = .{
         "src",
diff --git a/examples/read.zig b/examples/read.zig
deleted file mode 100644
index 3fb77f3..0000000
--- a/examples/read.zig
+++ /dev/null
@@ -1,46 +0,0 @@
-const std = @import("std");
-const xml = @import("xml");
-
-pub fn main() !void {
-    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
-    defer _ = gpa.deinit();
-    const allocator = gpa.allocator();
-
-    const args = try std.process.argsAlloc(allocator);
-    defer std.process.argsFree(allocator, args);
-    if (args.len != 2) {
-        return error.InvalidArguments;
-    }
-    const input_path = args[1];
-
-    const stdout_raw = std.io.getStdOut().writer();
-    var stdout_buffered_writer = std.io.bufferedWriter(stdout_raw);
-    const stdout = stdout_buffered_writer.writer();
-
-    const input_file = try std.fs.cwd().openFile(input_path, .{});
-    defer input_file.close();
-    var input_buffered_reader = std.io.bufferedReader(input_file.reader());
-    var reader = xml.reader(allocator, input_buffered_reader.reader(), .{});
-    defer reader.deinit();
-
-    while (try reader.next()) |event| {
-        try printEvent(stdout, event);
-    }
-    try stdout_buffered_writer.flush();
-}
-
-fn printEvent(out: anytype, event: xml.Event) !void {
-    switch (event) {
-        .xml_declaration => |xml_declaration| try out.print("<!xml {s} {?s} {?}\n", .{ xml_declaration.version, xml_declaration.encoding, xml_declaration.standalone }),
-        .element_start => |element_start| {
-            try out.print("<{?s}({?s}):{s}\n", .{ element_start.name.prefix, element_start.name.ns, element_start.name.local });
-            for (element_start.attributes) |attr| {
-                try out.print("  @{?s}({?s}):{s}={s}\n", .{ attr.name.prefix, attr.name.ns, attr.name.local, attr.value });
-            }
-        },
-        .element_content => |element_content| try out.print("  {s}\n", .{element_content.content}),
-        .element_end => |element_end| try out.print("/{?s}({?s}):{s}\n", .{ element_end.name.prefix, element_end.name.ns, element_end.name.local }),
-        .comment => |comment| try out.print("<!--{s}\n", .{comment.content}),
-        .pi => |pi| try out.print("<?{s} {s}\n", .{ pi.target, pi.content }),
-    }
-}
diff --git a/examples/reader.zig b/examples/reader.zig
new file mode 100644
index 0000000..40779f0
--- /dev/null
+++ b/examples/reader.zig
@@ -0,0 +1,105 @@
+const std = @import("std");
+const xml = @import("xml");
+
+pub fn main() !void {
+    var gpa_state: std.heap.GeneralPurposeAllocator(.{}) = .{};
+    defer _ = gpa_state.deinit();
+    const gpa = gpa_state.allocator();
+
+    const args = try std.process.argsAlloc(gpa);
+    defer std.process.argsFree(gpa, args);
+    if (args.len != 2) {
+        return error.InvalidArguments; // usage: example-reader file
+    }
+
+    var input_file = try std.fs.cwd().openFile(args[1], .{});
+    defer input_file.close();
+    // It is not necessary to wrap the input in a BufferedReader. The streaming
+    // document uses an internal buffer and reads its input in chunks, not byte
+    // by byte.
+    var doc = xml.streamingDocument(gpa, input_file.reader());
+    defer doc.deinit();
+    var reader = doc.reader(gpa, .{});
+    defer reader.deinit();
+
+    var stdout_buf = std.io.bufferedWriter(std.io.getStdOut().writer());
+    const stdout = stdout_buf.writer();
+
+    while (true) {
+        const node = reader.read() catch |err| {
+            try stdout.print("{}: {}\n", .{ err, reader.reader.error_code });
+            break;
+        };
+        switch (node) {
+            .eof => break,
+            .xml_declaration => {
+                try stdout.print("xml_declaration: version={s} encoding={?s} standalone={?}\n", .{
+                    reader.xmlDeclarationVersion(),
+                    reader.xmlDeclarationEncoding(),
+                    reader.xmlDeclarationStandalone(),
+                });
+            },
+            .element_start => {
+                const element_name = reader.elementNameNs();
+                try stdout.print("element_start: \"{}\"[\"{}\"]:\"{}\"\n", .{
+                    std.zig.fmtEscapes(element_name.prefix),
+                    std.zig.fmtEscapes(element_name.ns),
+                    std.zig.fmtEscapes(element_name.local),
+                });
+                for (0..reader.reader.attributeCount()) |i| {
+                    const attribute_name = reader.attributeNameNs(i);
+                    try stdout.print("  attribute: \"{}\"[\"{}\"]:\"{}\" = \"{}\"\n", .{
+                        std.zig.fmtEscapes(attribute_name.prefix),
+                        std.zig.fmtEscapes(attribute_name.ns),
+                        std.zig.fmtEscapes(attribute_name.local),
+                        std.zig.fmtEscapes(try reader.attributeValue(i)),
+                    });
+                }
+            },
+            .element_end => {
+                const element_name = reader.elementNameNs();
+                try stdout.print("element_end: \"{}\"[\"{}\"]:\"{}\"\n", .{
+                    std.zig.fmtEscapes(element_name.prefix),
+                    std.zig.fmtEscapes(element_name.ns),
+                    std.zig.fmtEscapes(element_name.local),
+                });
+            },
+            .comment => {
+                try stdout.print("comment: \"{}\"\n", .{
+                    std.zig.fmtEscapes(try reader.comment()),
+                });
+            },
+            .pi => {
+                try stdout.print("pi: \"{}\" \"{}\"\n", .{
+                    std.zig.fmtEscapes(reader.piTarget()),
+                    std.zig.fmtEscapes(try reader.piData()),
+                });
+            },
+            .text => {
+                try stdout.print("text: \"{}\"\n", .{
+                    std.zig.fmtEscapes(try reader.text()),
+                });
+            },
+            .cdata => {
+                try stdout.print("cdata: \"{}\"\n", .{
+                    std.zig.fmtEscapes(try reader.cdata()),
+                });
+            },
+            .entity_reference => {
+                try stdout.print("entity_reference: \"{}\"\n", .{
+                    std.zig.fmtEscapes(reader.entityReferenceName()),
+                });
+            },
+            .character_reference => {
+                var buf: [4]u8 = undefined;
+                const len = std.unicode.utf8Encode(reader.characterReferenceChar(), &buf) catch unreachable;
+                try stdout.print("character_reference: {} ('{'}')\n", .{
+                    reader.characterReferenceChar(),
+                    std.zig.fmtEscapes(buf[0..len]),
+                });
+            },
+        }
+    }
+
+    try stdout_buf.flush();
+}
diff --git a/examples/scan.zig b/examples/scan.zig
deleted file mode 100644
index d05f549..0000000
--- a/examples/scan.zig
+++ /dev/null
@@ -1,53 +0,0 @@
-const std = @import("std");
-const xml = @import("xml");
-
-pub fn main() !void {
-    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
-    defer _ = gpa.deinit();
-    const allocator = gpa.allocator();
-
-    const args = try std.process.argsAlloc(allocator);
-    defer std.process.argsFree(allocator, args);
-    if (args.len != 2) {
-        return error.InvalidArguments;
-    }
-    const input_path = args[1];
-
-    const stdout_raw = std.io.getStdOut().writer();
-    var stdout_buffered_writer = std.io.bufferedWriter(stdout_raw);
-    const stdout = stdout_buffered_writer.writer();
-    const stderr = std.io.getStdErr().writer();
-
-    const input_file = try std.fs.cwd().openFile(input_path, .{});
-    defer input_file.close();
-    var input_buffered_reader = std.io.bufferedReader(input_file.reader());
-    const input_reader = input_buffered_reader.reader();
-    var scanner = xml.Scanner{};
-    var decoder = xml.encoding.DefaultDecoder{};
-
-    var line: usize = 1;
-    var column: usize = 1;
-    while (true) {
-        var buf: [4]u8 = undefined;
-        const c = try decoder.readCodepoint(input_reader, &buf);
-        if (!c.present) break;
-        const token = scanner.next(c.codepoint, c.byte_length) catch |e| {
-            try stdout_buffered_writer.flush();
-            try stderr.print("error: {} ({}:{}): {}\n", .{ scanner.pos, line, column, e });
-            return;
-        };
-        if (token != .ok) {
-            try stdout.print("{} ({}:{}): {}\n", .{ scanner.pos, line, column, scanner.fullToken(token) });
-        }
-        if (c.codepoint == '\n') {
-            line += 1;
-            column = 1;
-        } else {
-            column += 1;
-        }
-    }
-    try stdout_buffered_writer.flush();
-    scanner.endInput() catch |e| {
-        try stderr.print("error: {} ({}:{}): {}\n", .{ scanner.pos, line, column, e });
-    };
-}
diff --git a/fuzz/.gitignore b/fuzz/.gitignore
new file mode 100644
index 0000000..03567fc
--- /dev/null
+++ b/fuzz/.gitignore
@@ -0,0 +1 @@
+outputs
diff --git a/fuzz/build.zig b/fuzz/build.zig
new file mode 100644
index 0000000..ee4accf
--- /dev/null
+++ b/fuzz/build.zig
@@ -0,0 +1,34 @@
+const std = @import("std");
+const afl = @import("zig-afl-kit");
+
+pub fn build(b: *std.Build) void {
+    const target = b.standardTargetOptions(.{});
+
+    const xml = b.dependency("xml", .{
+        .target = target,
+        .optimize = .Debug,
+    });
+
+    const afl_obj = b.addObject(.{
+        .name = "fuzz-xml",
+        .root_source_file = b.path("src/fuzz.zig"),
+        .target = target,
+        .optimize = .Debug,
+    });
+    afl_obj.root_module.stack_check = false;
+    afl_obj.root_module.link_libc = true;
+    afl_obj.root_module.fuzz = true;
+    afl_obj.root_module.addImport("xml", xml.module("xml"));
+
+    // TODO: ABI issues on my system
+    // const afl_exe = afl.addInstrumentedExe(b, target, .Debug, afl_obj);
+    const afl_exe = afl_exe: {
+        const run_afl_cc = b.addSystemCommand(&.{ "afl-cc", "-O3", "-o" });
+        const afl_exe = run_afl_cc.addOutputFileArg(afl_obj.name);
+        run_afl_cc.addFileArg(b.dependency("zig-afl-kit", .{}).path("afl.c"));
+        run_afl_cc.addFileArg(afl_obj.getEmittedLlvmBc());
+        break :afl_exe afl_exe;
+    };
+    const afl_exe_install = b.addInstallBinFile(afl_exe, "fuzz-xml");
+    b.getInstallStep().dependOn(&afl_exe_install.step);
+}
diff --git a/fuzz/build.zig.zon b/fuzz/build.zig.zon
new file mode 100644
index 0000000..78d98a2
--- /dev/null
+++ b/fuzz/build.zig.zon
@@ -0,0 +1,18 @@
+.{
+    .name = "xml_fuzz",
+    .version = "0.0.0",
+    .paths = .{
+        "build.zig",
+        "build.zig.zon",
+        "src",
+    },
+    .dependencies = .{
+        .xml = .{
+            .path = "..",
+        },
+        .@"zig-afl-kit" = .{
+            .url = "git+https://github.com/kristoff-it/zig-afl-kit?ref=main#f003bfe714f2964c90939fdc940d5993190a66ec",
+            .hash = "1220f2d8402bb7bbc4786b9c0aad73910929ea209cbd3b063842371d68abfed33c1e",
+        },
+    },
+}
diff --git a/fuzz/dictionaries/xml_UTF_16.dict b/fuzz/dictionaries/xml_UTF_16.dict
deleted file mode 100644
index 404d327..0000000
--- a/fuzz/dictionaries/xml_UTF_16.dict
+++ /dev/null
@@ -1,103 +0,0 @@
-# xml.dict converted to UTF-16 encoding.
-"\xff\xfe \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-"\xff\xfe \x00a\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-"\xff\xfe \x00h\x00r\x00e\x00f\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-"\xff\xfe \x00s\x00t\x00a\x00n\x00d\x00a\x00l\x00o\x00n\x00e\x00=\x00\\\x00"\x00n\x00o\x00\\\x00"\x00"
-"\xff\xfe \x00v\x00e\x00r\x00s\x00i\x00o\x00n\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-"\xff\xfe \x00x\x00m\x00l\x00:\x00b\x00a\x00s\x00e\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-"\xff\xfe \x00x\x00m\x00l\x00:\x00i\x00d\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-"\xff\xfe \x00x\x00m\x00l\x00:\x00l\x00a\x00n\x00g\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-"\xff\xfe \x00x\x00m\x00l\x00:\x00s\x00p\x00a\x00c\x00e\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-"\xff\xfe \x00x\x00m\x00l\x00n\x00s\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-"\xff\xfe&\x00l\x00t\x00;\x00"
-"\xff\xfe&\x00#\x001\x00;\x00"
-"\xff\xfe&\x00a\x00;\x00"
-"\xff\xfe&\x00#\x00x\x001\x00;\x00"
-"\xff\xfeA\x00N\x00Y\x00"
-"\xff\xfeA\x00T\x00T\x00L\x00I\x00S\x00T\x00"
-"\xff\xfeC\x00D\x00A\x00T\x00A\x00"
-"\xff\xfeD\x00O\x00C\x00T\x00Y\x00P\x00E\x00"
-"\xff\xfeE\x00L\x00E\x00M\x00E\x00N\x00T\x00"
-"\xff\xfeE\x00M\x00P\x00T\x00Y\x00"
-"\xff\xfeE\x00N\x00T\x00I\x00T\x00I\x00E\x00S\x00"
-"\xff\xfeE\x00N\x00T\x00I\x00T\x00Y\x00"
-"\xff\xfeF\x00I\x00X\x00E\x00D\x00"
-"\xff\xfeI\x00D\x00"
-"\xff\xfeI\x00D\x00R\x00E\x00F\x00"
-"\xff\xfeI\x00D\x00R\x00E\x00F\x00S\x00"
-"\xff\xfeI\x00G\x00N\x00O\x00R\x00E\x00"
-"\xff\xfeI\x00M\x00P\x00L\x00I\x00E\x00D\x00"
-"\xff\xfeI\x00N\x00C\x00L\x00U\x00D\x00E\x00"
-"\xff\xfeN\x00D\x00A\x00T\x00A\x00"
-"\xff\xfeN\x00M\x00T\x00O\x00K\x00E\x00N\x00"
-"\xff\xfeN\x00M\x00T\x00O\x00K\x00E\x00N\x00S\x00"
-"\xff\xfeN\x00O\x00T\x00A\x00T\x00I\x00O\x00N\x00"
-"\xff\xfeP\x00C\x00D\x00A\x00T\x00A\x00"
-"\xff\xfeP\x00U\x00B\x00L\x00I\x00C\x00"
-"\xff\xfeR\x00E\x00Q\x00U\x00I\x00R\x00E\x00D\x00"
-"\xff\xfeS\x00Y\x00S\x00T\x00E\x00M\x00"
-"\xff\xfe<\x00"
-"\xff\xfe>\x00"
-"\xff\xfe/\x00>\x00"
-"\xff\xfe<\x00/\x00"
-"\xff\xfe<\x00?\x00"
-"\xff\xfe?\x00>\x00"
-"\xff\xfe<\x00!\x00"
-"\xff\xfe!\x00>\x00"
-"\xff\xfe[\x00]\x00"
-"\xff\xfe]\x00]\x00"
-"\xff\xfe<\x00!\x00[\x00C\x00D\x00A\x00T\x00A\x00[\x00"
-"\xff\xfe<\x00!\x00[\x00C\x00D\x00A\x00T\x00A\x00[\x00]\x00]\x00>\x00"
-"\xff\xfe\\\x00"\x00\\\x00"\x00"
-"\xff\xfe'\x00'\x00"
-"\xff\xfe=\x00\\\x00"\x00\\\x00"\x00"
-"\xff\xfe=\x00'\x00'\x00"
-"\xff\xfe<\x00!\x00A\x00T\x00T\x00L\x00I\x00S\x00T\x00"
-"\xff\xfe<\x00!\x00D\x00O\x00C\x00T\x00Y\x00P\x00E\x00"
-"\xff\xfe<\x00!\x00E\x00L\x00E\x00M\x00E\x00N\x00T\x00"
-"\xff\xfe<\x00!\x00E\x00N\x00T\x00I\x00T\x00Y\x00"
-"\xff\xfe<\x00!\x00[\x00I\x00G\x00N\x00O\x00R\x00E\x00[\x00"
-"\xff\xfe<\x00!\x00[\x00I\x00N\x00C\x00L\x00U\x00D\x00E\x00[\x00"
-"\xff\xfe<\x00!\x00N\x00O\x00T\x00A\x00T\x00I\x00O\x00N\x00"
-"\xff\xfe#\x00C\x00D\x00A\x00T\x00A\x00"
-"\xff\xfe#\x00F\x00I\x00X\x00E\x00D\x00"
-"\xff\xfe#\x00I\x00M\x00P\x00L\x00I\x00E\x00D\x00"
-"\xff\xfe#\x00P\x00C\x00D\x00A\x00T\x00A\x00"
-"\xff\xfe#\x00R\x00E\x00Q\x00U\x00I\x00R\x00E\x00D\x00"
-"\xff\xfeI\x00S\x00O\x00-\x008\x008\x005\x009\x00-\x001\x00"
-"\xff\xfeU\x00S\x00-\x00A\x00S\x00C\x00I\x00I\x00"
-"\xff\xfeU\x00T\x00F\x00-\x008\x00"
-"\xff\xfeU\x00T\x00F\x00-\x001\x006\x00"
-"\xff\xfeU\x00T\x00F\x00-\x001\x006\x00B\x00E\x00"
-"\xff\xfeU\x00T\x00F\x00-\x001\x006\x00L\x00E\x00"
-"\xff\xfex\x00m\x00l\x00n\x00s\x00"
-"\xff\xfex\x00m\x00l\x00n\x00s\x00:\x00"
-"\xff\xfex\x00m\x00l\x00n\x00s\x00:\x00x\x00h\x00t\x00m\x00l\x00=\x00\\\x00"\x00h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00w\x003\x00.\x00o\x00r\x00g\x00/\x001\x009\x009\x009\x00/\x00x\x00h\x00t\x00m\x00l\x00\\\x00"\x00"
-"\xff\xfex\x00m\x00l\x00n\x00s\x00:\x00x\x00m\x00l\x00=\x00\\\x00"\x00h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00w\x003\x00.\x00o\x00r\x00g\x00/\x00X\x00M\x00L\x00/\x001\x009\x009\x008\x00/\x00n\x00a\x00m\x00e\x00s\x00p\x00a\x00c\x00e\x00\\\x00"\x00"
-"\xff\xfex\x00m\x00l\x00n\x00s\x00:\x00x\x00m\x00l\x00n\x00s\x00=\x00\\\x00"\x00h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00w\x003\x00.\x00o\x00r\x00g\x00/\x002\x000\x000\x000\x00/\x00x\x00m\x00l\x00n\x00s\x00\\\x00"\x00"
-"\xff\xfe:\x00f\x00a\x00l\x00l\x00b\x00a\x00c\x00k\x00"
-"\xff\xfe:\x00a\x00"
-"\xff\xfe:\x00i\x00n\x00c\x00l\x00u\x00d\x00e\x00"
-"\xff\xfe-\x00-\x00"
-"\xff\xfe(\x00)\x00"
-"\xff\xfe%\x00a\x00"
-"\xff\xfe:\x00s\x00c\x00h\x00e\x00m\x00a\x00"
-"\xff\xfeU\x00C\x00S\x00-\x004\x00"
-"\xff\xfe<\x00/\x00a\x00>\x00"
-"\xff\xfe<\x00a\x00>\x00"
-"\xff\xfe<\x00a\x00 \x00/\x00>\x00"
-"\xff\xfe<\x00?\x00x\x00m\x00l\x00?\x00>\x00"
-"\xff\xfeh\x00t\x00t\x00p\x00:\x00/\x00/\x00d\x00o\x00c\x00b\x00o\x00o\x00"
-"\xff\xfeh\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00w\x00"
-"\xff\xfeh\x00e\x003\x000\x00"
-"\xff\xfeh\x00e\x002\x00"
-"\xff\xfeI\x00E\x00T\x00"
-"\xff\xfeF\x00D\x00F\x00-\x001\x000\x00"
-"\xff\xfea\x00D\x00U\x00C\x00S\x00-\x004\x00O\x00P\x00v\x00e\x00b\x00:\x00"
-"\xff\xfea\x00>\x00"
-"\xff\xfeU\x00T\x00"
-"\xff\xfex\x00M\x00l\x00"
-"\xff\xfe/\x00u\x00s\x00r\x00/\x00s\x00h\x00a\x00r\x00e\x00/\x00s\x00g\x00"
-"\xff\xfeh\x00a\x000\x007\x00"
-"\xff\xfeh\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00o\x00a\x00"
-"\xff\xfec\x00l\x00e\x00"
diff --git a/fuzz/dictionaries/xml_UTF_16BE.dict b/fuzz/dictionaries/xml_UTF_16BE.dict
deleted file mode 100644
index c1bdbcf..0000000
--- a/fuzz/dictionaries/xml_UTF_16BE.dict
+++ /dev/null
@@ -1,103 +0,0 @@
-# xml.dict converted to UTF-16BE encoding.
-"\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=\x00\\\x00"\x001\x00\\\x00""
-"\x00 \x00a\x00=\x00\\\x00"\x001\x00\\\x00""
-"\x00 \x00h\x00r\x00e\x00f\x00=\x00\\\x00"\x001\x00\\\x00""
-"\x00 \x00s\x00t\x00a\x00n\x00d\x00a\x00l\x00o\x00n\x00e\x00=\x00\\\x00"\x00n\x00o\x00\\\x00""
-"\x00 \x00v\x00e\x00r\x00s\x00i\x00o\x00n\x00=\x00\\\x00"\x001\x00\\\x00""
-"\x00 \x00x\x00m\x00l\x00:\x00b\x00a\x00s\x00e\x00=\x00\\\x00"\x001\x00\\\x00""
-"\x00 \x00x\x00m\x00l\x00:\x00i\x00d\x00=\x00\\\x00"\x001\x00\\\x00""
-"\x00 \x00x\x00m\x00l\x00:\x00l\x00a\x00n\x00g\x00=\x00\\\x00"\x001\x00\\\x00""
-"\x00 \x00x\x00m\x00l\x00:\x00s\x00p\x00a\x00c\x00e\x00=\x00\\\x00"\x001\x00\\\x00""
-"\x00 \x00x\x00m\x00l\x00n\x00s\x00=\x00\\\x00"\x001\x00\\\x00""
-"\x00&\x00l\x00t\x00;"
-"\x00&\x00#\x001\x00;"
-"\x00&\x00a\x00;"
-"\x00&\x00#\x00x\x001\x00;"
-"\x00A\x00N\x00Y"
-"\x00A\x00T\x00T\x00L\x00I\x00S\x00T"
-"\x00C\x00D\x00A\x00T\x00A"
-"\x00D\x00O\x00C\x00T\x00Y\x00P\x00E"
-"\x00E\x00L\x00E\x00M\x00E\x00N\x00T"
-"\x00E\x00M\x00P\x00T\x00Y"
-"\x00E\x00N\x00T\x00I\x00T\x00I\x00E\x00S"
-"\x00E\x00N\x00T\x00I\x00T\x00Y"
-"\x00F\x00I\x00X\x00E\x00D"
-"\x00I\x00D"
-"\x00I\x00D\x00R\x00E\x00F"
-"\x00I\x00D\x00R\x00E\x00F\x00S"
-"\x00I\x00G\x00N\x00O\x00R\x00E"
-"\x00I\x00M\x00P\x00L\x00I\x00E\x00D"
-"\x00I\x00N\x00C\x00L\x00U\x00D\x00E"
-"\x00N\x00D\x00A\x00T\x00A"
-"\x00N\x00M\x00T\x00O\x00K\x00E\x00N"
-"\x00N\x00M\x00T\x00O\x00K\x00E\x00N\x00S"
-"\x00N\x00O\x00T\x00A\x00T\x00I\x00O\x00N"
-"\x00P\x00C\x00D\x00A\x00T\x00A"
-"\x00P\x00U\x00B\x00L\x00I\x00C"
-"\x00R\x00E\x00Q\x00U\x00I\x00R\x00E\x00D"
-"\x00S\x00Y\x00S\x00T\x00E\x00M"
-"\x00<"
-"\x00>"
-"\x00/\x00>"
-"\x00<\x00/"
-"\x00<\x00?"
-"\x00?\x00>"
-"\x00<\x00!"
-"\x00!\x00>"
-"\x00[\x00]"
-"\x00]\x00]"
-"\x00<\x00!\x00[\x00C\x00D\x00A\x00T\x00A\x00["
-"\x00<\x00!\x00[\x00C\x00D\x00A\x00T\x00A\x00[\x00]\x00]\x00>"
-"\x00\\\x00"\x00\\\x00""
-"\x00'\x00'"
-"\x00=\x00\\\x00"\x00\\\x00""
-"\x00=\x00'\x00'"
-"\x00<\x00!\x00A\x00T\x00T\x00L\x00I\x00S\x00T"
-"\x00<\x00!\x00D\x00O\x00C\x00T\x00Y\x00P\x00E"
-"\x00<\x00!\x00E\x00L\x00E\x00M\x00E\x00N\x00T"
-"\x00<\x00!\x00E\x00N\x00T\x00I\x00T\x00Y"
-"\x00<\x00!\x00[\x00I\x00G\x00N\x00O\x00R\x00E\x00["
-"\x00<\x00!\x00[\x00I\x00N\x00C\x00L\x00U\x00D\x00E\x00["
-"\x00<\x00!\x00N\x00O\x00T\x00A\x00T\x00I\x00O\x00N"
-"\x00#\x00C\x00D\x00A\x00T\x00A"
-"\x00#\x00F\x00I\x00X\x00E\x00D"
-"\x00#\x00I\x00M\x00P\x00L\x00I\x00E\x00D"
-"\x00#\x00P\x00C\x00D\x00A\x00T\x00A"
-"\x00#\x00R\x00E\x00Q\x00U\x00I\x00R\x00E\x00D"
-"\x00I\x00S\x00O\x00-\x008\x008\x005\x009\x00-\x001"
-"\x00U\x00S\x00-\x00A\x00S\x00C\x00I\x00I"
-"\x00U\x00T\x00F\x00-\x008"
-"\x00U\x00T\x00F\x00-\x001\x006"
-"\x00U\x00T\x00F\x00-\x001\x006\x00B\x00E"
-"\x00U\x00T\x00F\x00-\x001\x006\x00L\x00E"
-"\x00x\x00m\x00l\x00n\x00s"
-"\x00x\x00m\x00l\x00n\x00s\x00:"
-"\x00x\x00m\x00l\x00n\x00s\x00:\x00x\x00h\x00t\x00m\x00l\x00=\x00\\\x00"\x00h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00w\x003\x00.\x00o\x00r\x00g\x00/\x001\x009\x009\x009\x00/\x00x\x00h\x00t\x00m\x00l\x00\\\x00""
-"\x00x\x00m\x00l\x00n\x00s\x00:\x00x\x00m\x00l\x00=\x00\\\x00"\x00h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00w\x003\x00.\x00o\x00r\x00g\x00/\x00X\x00M\x00L\x00/\x001\x009\x009\x008\x00/\x00n\x00a\x00m\x00e\x00s\x00p\x00a\x00c\x00e\x00\\\x00""
-"\x00x\x00m\x00l\x00n\x00s\x00:\x00x\x00m\x00l\x00n\x00s\x00=\x00\\\x00"\x00h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00w\x003\x00.\x00o\x00r\x00g\x00/\x002\x000\x000\x000\x00/\x00x\x00m\x00l\x00n\x00s\x00\\\x00""
-"\x00:\x00f\x00a\x00l\x00l\x00b\x00a\x00c\x00k"
-"\x00:\x00a"
-"\x00:\x00i\x00n\x00c\x00l\x00u\x00d\x00e"
-"\x00-\x00-"
-"\x00(\x00)"
-"\x00%\x00a"
-"\x00:\x00s\x00c\x00h\x00e\x00m\x00a"
-"\x00U\x00C\x00S\x00-\x004"
-"\x00<\x00/\x00a\x00>"
-"\x00<\x00a\x00>"
-"\x00<\x00a\x00 \x00/\x00>"
-"\x00<\x00?\x00x\x00m\x00l\x00?\x00>"
-"\x00h\x00t\x00t\x00p\x00:\x00/\x00/\x00d\x00o\x00c\x00b\x00o\x00o"
-"\x00h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00w"
-"\x00h\x00e\x003\x000"
-"\x00h\x00e\x002"
-"\x00I\x00E\x00T"
-"\x00F\x00D\x00F\x00-\x001\x000"
-"\x00a\x00D\x00U\x00C\x00S\x00-\x004\x00O\x00P\x00v\x00e\x00b\x00:"
-"\x00a\x00>"
-"\x00U\x00T"
-"\x00x\x00M\x00l"
-"\x00/\x00u\x00s\x00r\x00/\x00s\x00h\x00a\x00r\x00e\x00/\x00s\x00g"
-"\x00h\x00a\x000\x007"
-"\x00h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00o\x00a"
-"\x00c\x00l\x00e"
diff --git a/fuzz/dictionaries/xml_UTF_16LE.dict b/fuzz/dictionaries/xml_UTF_16LE.dict
deleted file mode 100644
index 02619db..0000000
--- a/fuzz/dictionaries/xml_UTF_16LE.dict
+++ /dev/null
@@ -1,103 +0,0 @@
-# xml.dict converted to UTF-16LE encoding.
-" \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-" \x00a\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-" \x00h\x00r\x00e\x00f\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-" \x00s\x00t\x00a\x00n\x00d\x00a\x00l\x00o\x00n\x00e\x00=\x00\\\x00"\x00n\x00o\x00\\\x00"\x00"
-" \x00v\x00e\x00r\x00s\x00i\x00o\x00n\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-" \x00x\x00m\x00l\x00:\x00b\x00a\x00s\x00e\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-" \x00x\x00m\x00l\x00:\x00i\x00d\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-" \x00x\x00m\x00l\x00:\x00l\x00a\x00n\x00g\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-" \x00x\x00m\x00l\x00:\x00s\x00p\x00a\x00c\x00e\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-" \x00x\x00m\x00l\x00n\x00s\x00=\x00\\\x00"\x001\x00\\\x00"\x00"
-"&\x00l\x00t\x00;\x00"
-"&\x00#\x001\x00;\x00"
-"&\x00a\x00;\x00"
-"&\x00#\x00x\x001\x00;\x00"
-"A\x00N\x00Y\x00"
-"A\x00T\x00T\x00L\x00I\x00S\x00T\x00"
-"C\x00D\x00A\x00T\x00A\x00"
-"D\x00O\x00C\x00T\x00Y\x00P\x00E\x00"
-"E\x00L\x00E\x00M\x00E\x00N\x00T\x00"
-"E\x00M\x00P\x00T\x00Y\x00"
-"E\x00N\x00T\x00I\x00T\x00I\x00E\x00S\x00"
-"E\x00N\x00T\x00I\x00T\x00Y\x00"
-"F\x00I\x00X\x00E\x00D\x00"
-"I\x00D\x00"
-"I\x00D\x00R\x00E\x00F\x00"
-"I\x00D\x00R\x00E\x00F\x00S\x00"
-"I\x00G\x00N\x00O\x00R\x00E\x00"
-"I\x00M\x00P\x00L\x00I\x00E\x00D\x00"
-"I\x00N\x00C\x00L\x00U\x00D\x00E\x00"
-"N\x00D\x00A\x00T\x00A\x00"
-"N\x00M\x00T\x00O\x00K\x00E\x00N\x00"
-"N\x00M\x00T\x00O\x00K\x00E\x00N\x00S\x00"
-"N\x00O\x00T\x00A\x00T\x00I\x00O\x00N\x00"
-"P\x00C\x00D\x00A\x00T\x00A\x00"
-"P\x00U\x00B\x00L\x00I\x00C\x00"
-"R\x00E\x00Q\x00U\x00I\x00R\x00E\x00D\x00"
-"S\x00Y\x00S\x00T\x00E\x00M\x00"
-"<\x00"
-">\x00"
-"/\x00>\x00"
-"<\x00/\x00"
-"<\x00?\x00"
-"?\x00>\x00"
-"<\x00!\x00"
-"!\x00>\x00"
-"[\x00]\x00"
-"]\x00]\x00"
-"<\x00!\x00[\x00C\x00D\x00A\x00T\x00A\x00[\x00"
-"<\x00!\x00[\x00C\x00D\x00A\x00T\x00A\x00[\x00]\x00]\x00>\x00"
-"\\\x00"\x00\\\x00"\x00"
-"'\x00'\x00"
-"=\x00\\\x00"\x00\\\x00"\x00"
-"=\x00'\x00'\x00"
-"<\x00!\x00A\x00T\x00T\x00L\x00I\x00S\x00T\x00"
-"<\x00!\x00D\x00O\x00C\x00T\x00Y\x00P\x00E\x00"
-"<\x00!\x00E\x00L\x00E\x00M\x00E\x00N\x00T\x00"
-"<\x00!\x00E\x00N\x00T\x00I\x00T\x00Y\x00"
-"<\x00!\x00[\x00I\x00G\x00N\x00O\x00R\x00E\x00[\x00"
-"<\x00!\x00[\x00I\x00N\x00C\x00L\x00U\x00D\x00E\x00[\x00"
-"<\x00!\x00N\x00O\x00T\x00A\x00T\x00I\x00O\x00N\x00"
-"#\x00C\x00D\x00A\x00T\x00A\x00"
-"#\x00F\x00I\x00X\x00E\x00D\x00"
-"#\x00I\x00M\x00P\x00L\x00I\x00E\x00D\x00"
-"#\x00P\x00C\x00D\x00A\x00T\x00A\x00"
-"#\x00R\x00E\x00Q\x00U\x00I\x00R\x00E\x00D\x00"
-"I\x00S\x00O\x00-\x008\x008\x005\x009\x00-\x001\x00"
-"U\x00S\x00-\x00A\x00S\x00C\x00I\x00I\x00"
-"U\x00T\x00F\x00-\x008\x00"
-"U\x00T\x00F\x00-\x001\x006\x00"
-"U\x00T\x00F\x00-\x001\x006\x00B\x00E\x00"
-"U\x00T\x00F\x00-\x001\x006\x00L\x00E\x00"
-"x\x00m\x00l\x00n\x00s\x00"
-"x\x00m\x00l\x00n\x00s\x00:\x00"
-"x\x00m\x00l\x00n\x00s\x00:\x00x\x00h\x00t\x00m\x00l\x00=\x00\\\x00"\x00h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00w\x003\x00.\x00o\x00r\x00g\x00/\x001\x009\x009\x009\x00/\x00x\x00h\x00t\x00m\x00l\x00\\\x00"\x00"
-"x\x00m\x00l\x00n\x00s\x00:\x00x\x00m\x00l\x00=\x00\\\x00"\x00h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00w\x003\x00.\x00o\x00r\x00g\x00/\x00X\x00M\x00L\x00/\x001\x009\x009\x008\x00/\x00n\x00a\x00m\x00e\x00s\x00p\x00a\x00c\x00e\x00\\\x00"\x00"
-"x\x00m\x00l\x00n\x00s\x00:\x00x\x00m\x00l\x00n\x00s\x00=\x00\\\x00"\x00h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00w\x003\x00.\x00o\x00r\x00g\x00/\x002\x000\x000\x000\x00/\x00x\x00m\x00l\x00n\x00s\x00\\\x00"\x00"
-":\x00f\x00a\x00l\x00l\x00b\x00a\x00c\x00k\x00"
-":\x00a\x00"
-":\x00i\x00n\x00c\x00l\x00u\x00d\x00e\x00"
-"-\x00-\x00"
-"(\x00)\x00"
-"%\x00a\x00"
-":\x00s\x00c\x00h\x00e\x00m\x00a\x00"
-"U\x00C\x00S\x00-\x004\x00"
-"<\x00/\x00a\x00>\x00"
-"<\x00a\x00>\x00"
-"<\x00a\x00 \x00/\x00>\x00"
-"<\x00?\x00x\x00m\x00l\x00?\x00>\x00"
-"h\x00t\x00t\x00p\x00:\x00/\x00/\x00d\x00o\x00c\x00b\x00o\x00o\x00"
-"h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00w\x00"
-"h\x00e\x003\x000\x00"
-"h\x00e\x002\x00"
-"I\x00E\x00T\x00"
-"F\x00D\x00F\x00-\x001\x000\x00"
-"a\x00D\x00U\x00C\x00S\x00-\x004\x00O\x00P\x00v\x00e\x00b\x00:\x00"
-"a\x00>\x00"
-"U\x00T\x00"
-"x\x00M\x00l\x00"
-"/\x00u\x00s\x00r\x00/\x00s\x00h\x00a\x00r\x00e\x00/\x00s\x00g\x00"
-"h\x00a\x000\x007\x00"
-"h\x00t\x00t\x00p\x00:\x00/\x00/\x00w\x00w\x00w\x00.\x00o\x00a\x00"
-"c\x00l\x00e\x00"
diff --git a/fuzz/inputs/invalid-utf16be.xml b/fuzz/inputs/invalid-utf16be.xml
deleted file mode 100644
index d03b21d003b84d531e2985418b3183a920dc7a31..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 82
zcmezOpTUNqh#?;cOBn1J{24MB6d3Y=JRKli0_0^f6a!f>GLNB*A(0^m$WLKV1nPu9
IeIygO0LPjSLjV8(

diff --git a/fuzz/inputs/invalid-utf16le.xml b/fuzz/inputs/invalid-utf16le.xml
deleted file mode 100644
index 63636caba7acfc316d7852526b3bd9242eaa32f4..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 82
zcmezW&xWChAs+}!80;AQ88R3Y81jHT9Uxr-<Yh7x16eRKkD-hqks$}jPhn7Guz^B-
IBonw80My_QLjV8(

diff --git a/fuzz/inputs/invalid.xml b/fuzz/inputs/invalid.xml
deleted file mode 100644
index c336135..0000000
--- a/fuzz/inputs/invalid.xml
+++ /dev/null
@@ -1 +0,0 @@
-<root>Oh no, this is invalid!<<<</root>
diff --git a/fuzz/inputs/sample.xml b/fuzz/inputs/sample.xml
new file mode 100644
index 0000000..8f586dd
--- /dev/null
+++ b/fuzz/inputs/sample.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="utf-8"?>
+<?pi?>
+<root a="2" b='hello'>
+  Hi there &#32; &#x20;&#xD;&#xA;
+  One &amp; two
+  <![CDATA[<wow>]]>
+  <a:a xmlns="xml" xmlns:a="a"><b a:c="2"/></a:a>
+</root>
+<!-- comment -->
diff --git a/fuzz/inputs/valid-utf16be.xml b/fuzz/inputs/valid-utf16be.xml
deleted file mode 100644
index 027c1f473dc137b203d9d7a303cd24f3a9730e1e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 274
zcmaiuF%H5o5Co^@6^0rTXs80|5D$O{0I>imI8v}e@_Arw6jD&2lYF|p-SItNWaPBm
zC^0lxYVKUAaIBcHAYra<v~irI7uq_`al2>7hM8PTd2-f_#Jk`lFHA-i6w5)$UiqkA
sB(Hz1^sU}fRWViKp<Zh$7%QF1H8^`_`a{h)#{0Uzyulal4W#eIZ}~1OIsgCw

diff --git a/fuzz/inputs/valid-utf16le.xml b/fuzz/inputs/valid-utf16le.xml
deleted file mode 100644
index 958ccc7dfbb7c039406bea62608557f78ee55d83..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 274
zcmaiu%?<%E6otRFrzmR-8>=!q@c=vkra>|~$xP$t<#Ae>bYUSk>CHWVJ@1#L<iV91
z!wpAE$At#Zk`Z&J%(RUd&rx_K)^keijxB2@QZ1w@SuwzJCZxQ08BIhSdpUdLiv}eH
q{p*Bp^wye&u^e~pR#(MP?n0`p>@$m>RLddtd;ZPKy4W`+<;EW(dMi2r

diff --git a/fuzz/inputs/valid.xml b/fuzz/inputs/valid.xml
deleted file mode 100644
index a243053..0000000
--- a/fuzz/inputs/valid.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<html>
-  <head>
-    <title>Hello, world!</title>
-  </head>
-  <body>Hello, world!</body>
-</html>
diff --git a/fuzz/main.zig b/fuzz/main.zig
deleted file mode 100644
index 903422e..0000000
--- a/fuzz/main.zig
+++ /dev/null
@@ -1,30 +0,0 @@
-const std = @import("std");
-const xml = @import("xml");
-
-fn cMain() callconv(.C) void {
-    main();
-}
-
-comptime {
-    @export(cMain, .{ .name = "main" });
-}
-
-pub fn main() void {
-    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
-    defer std.debug.assert(gpa.deinit() == .ok);
-    const allocator = gpa.allocator();
-
-    var stdin_buf = std.io.bufferedReader(std.io.getStdIn().reader());
-    var reader = xml.reader(allocator, stdin_buf.reader(), .{});
-    defer reader.deinit();
-
-    var stdout_buf = std.io.bufferedWriter(std.io.getStdOut().writer());
-    const stdout = stdout_buf.writer();
-    const stderr = std.io.getStdErr().writer();
-    while (reader.next() catch |e| {
-        stderr.print("Error at {}: {}\n", .{ reader.token_reader.scanner.pos, e }) catch {};
-        return;
-    }) |event| {
-        stdout.print("{} {}\n", .{ reader.token_reader.scanner.pos, event }) catch {};
-    }
-}
diff --git a/fuzz/src/fuzz.zig b/fuzz/src/fuzz.zig
new file mode 100644
index 0000000..86f553b
--- /dev/null
+++ b/fuzz/src/fuzz.zig
@@ -0,0 +1,26 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const assert = std.debug.assert;
+const xml = @import("xml");
+
+export fn zig_fuzz_init() void {}
+
+export fn zig_fuzz_test(buf: [*]u8, len: isize) void {
+    var gpa_state: std.heap.GeneralPurposeAllocator(.{}) = .{};
+    defer assert(gpa_state.deinit() == .ok);
+    const gpa = gpa_state.allocator();
+    fuzz(gpa, buf[0..@intCast(len)]) catch @panic("OOM");
+}
+
+fn fuzz(gpa: Allocator, input: []const u8) !void {
+    var doc = xml.StaticDocument.init(input);
+    var reader = doc.reader(gpa, .{});
+    defer reader.deinit();
+    while (true) {
+        const node = reader.read() catch |err| switch (err) {
+            error.MalformedXml => break,
+            error.OutOfMemory => return error.OutOfMemory,
+        };
+        if (node == .eof) break;
+    }
+}
diff --git a/src/Reader.zig b/src/Reader.zig
new file mode 100644
index 0000000..bce392f
--- /dev/null
+++ b/src/Reader.zig
@@ -0,0 +1,2216 @@
+//! A streaming XML parser, aiming to conform to the [XML 1.0 (Fifth
+//! Edition)](https://www.w3.org/TR/2008/REC-xml-20081126) and [Namespaces in
+//! XML 1.0 (Third Edition)](https://www.w3.org/TR/2009/REC-xml-names-20091208/)
+//! specifications.
+//!
+//! This is the core, type-erased reader implementation. Generally, users will
+//! not use this directly, but will use `xml.GenericReader`, which is a thin
+//! wrapper around this type providing type safety for returned errors.
+//!
+//! A reader gets its raw data from a `Source`, which acts as a forward-only
+//! window of an XML document. In a simple case (`xml.StaticDocument`), this
+//! may just be slices of a document loaded completely in memory, but the same
+//! interface works just as well for a document streamed from a byte reader
+//! (`xml.StreamingDocument`).
+//!
+//! Calling `read` returns the next `Node` in the document, and other reader
+//! functions specific to each node type can be used to obtain more information
+//! about the current node. The convention is that functions associated with a
+//! specific node type have names starting with the node type (and `attribute`
+//! functions can only be called on an `element_start` node).
+//!
+//! Some reader functions end in `Ns`, providing namespace-aware functionality.
+//! These functions must only be called on a reader configured to be
+//! namespace-aware (namespace awareness is on by default in `Options`).
+
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const assert = std.debug.assert;
+const expectError = std.testing.expectError;
+const expectEqual = std.testing.expectEqual;
+const expectEqualDeep = std.testing.expectEqualDeep;
+const expectEqualStrings = std.testing.expectEqualStrings;
+
+const Location = @import("xml.zig").Location;
+const StaticDocument = @import("xml.zig").StaticDocument;
+const QName = @import("xml.zig").QName;
+const PrefixedQName = @import("xml.zig").PrefixedQName;
+const predefined_entities = @import("xml.zig").predefined_entities;
+const predefined_namespace_uris = @import("xml.zig").predefined_namespace_uris;
+const ns_xml = @import("xml.zig").ns_xml;
+const ns_xmlns = @import("xml.zig").ns_xmlns;
+
+options: Options,
+
+state: State,
+/// An array of buffer spans relevant to the current node.
+/// The layout of the spans depends on the node type:
+/// - `eof` - none
+/// - `xml_declaration` - "xml" (NAME VALUE)...
+/// - `element_start` - NAME (NAME VALUE)...
+/// - `element_end` - NAME
+/// - `comment` - COMMENT
+/// - `pi` - TARGET DATA
+/// - `text` - none
+/// - `cdata` - CDATA
+/// - `character_reference` - REF
+/// - `entity_reference` - REF
+spans: std.ArrayListUnmanaged(BufSpan),
+/// A map of attribute names to indexes.
+/// The keys are slices into `buf`.
+attributes: std.StringArrayHashMapUnmanaged(usize),
+/// A map of attribute qnames to indexes.
+/// The key `ns` and `local` values are slices into `buf`.
+q_attributes: std.ArrayHashMapUnmanaged(QName, usize, QNameContext, true),
+/// String data for the current element nesting context.
+/// Each element start node appends the name of the element to this buffer, and
+/// the element name is followed by any namespace prefixes and URIs declared on
+/// the element so they can be referenced by `ns_prefixes`.
+strings: std.ArrayListUnmanaged(u8),
+/// The start indexes of the element names in `strings`.
+element_names: std.ArrayListUnmanaged(StringIndex),
+/// The namespace prefixes declared by the current nesting context of elements.
+ns_prefixes: std.ArrayListUnmanaged(std.AutoArrayHashMapUnmanaged(StringIndex, StringIndex)),
+/// The Unicode code point associated with the current character reference.
+character: u21,
+
+source: Source,
+/// The source location of the beginning of `buf`.
+loc: Location,
+/// Buffered data read from `source`.
+buf: []const u8,
+/// The current position of the reader in `buf`.
+pos: usize,
+
+/// The last node returned by `read` (that is, the current node).
+node: ?Node,
+/// The current error code (only valid if `read` returned `error.MalformedXml`).
+error_code: ErrorCode,
+/// The position of the current error in `buf`.
+error_pos: usize,
+
+scratch: std.ArrayListUnmanaged(u8),
+
+gpa: Allocator,
+
+const Reader = @This();
+
+pub const Options = struct {
+    /// Whether the reader should handle namespaces in element and attribute
+    /// names. The `Ns`-suffixed functions of `Reader` may only be used when
+    /// this is enabled.
+    namespace_aware: bool = true,
+    /// Whether the reader should track the source location (line and column)
+    /// of nodes in the document. The `location` functions of `Reader` may only
+    /// be used when this is enabled.
+    location_aware: bool = true,
+    /// Whether the reader may assume that its input data is valid UTF-8.
+    assume_valid_utf8: bool = false,
+};
+
+pub const Node = enum {
+    eof,
+    xml_declaration,
+    element_start,
+    element_end,
+    comment,
+    pi,
+    text,
+    cdata,
+    character_reference,
+    entity_reference,
+};
+
+pub const ErrorCode = enum {
+    xml_declaration_attribute_unsupported,
+    xml_declaration_version_missing,
+    xml_declaration_version_unsupported,
+    xml_declaration_encoding_unsupported,
+    xml_declaration_standalone_malformed,
+    doctype_unsupported,
+    directive_unknown,
+    attribute_missing_space,
+    attribute_duplicate,
+    attribute_prefix_undeclared,
+    attribute_illegal_character,
+    element_end_mismatched,
+    element_end_unclosed,
+    comment_malformed,
+    comment_unclosed,
+    pi_unclosed,
+    pi_target_disallowed,
+    pi_missing_space,
+    text_cdata_end_disallowed,
+    cdata_unclosed,
+    entity_reference_unclosed,
+    entity_reference_undefined,
+    character_reference_unclosed,
+    character_reference_malformed,
+    name_malformed,
+    namespace_prefix_unbound,
+    namespace_binding_illegal,
+    namespace_prefix_illegal,
+    unexpected_character,
+    unexpected_eof,
+    expected_equals,
+    expected_quote,
+    missing_end_quote,
+    invalid_utf8,
+    illegal_character,
+};
+
+pub const Source = struct {
+    context: *const anyopaque,
+    moveFn: *const fn (context: *const anyopaque, advance: usize, len: usize) anyerror![]const u8,
+
+    pub fn move(source: Source, advance: usize, len: usize) anyerror![]const u8 {
+        return source.moveFn(source.context, advance, len);
+    }
+};
+
+const State = enum {
+    invalid,
+    start,
+    after_xml_declaration,
+    after_doctype,
+    in_root,
+    empty_element,
+    empty_root,
+    after_root,
+    eof,
+};
+
+pub fn init(gpa: Allocator, source: Source, options: Options) Reader {
+    return .{
+        .options = options,
+
+        .state = .start,
+        .spans = .{},
+        .attributes = .{},
+        .q_attributes = .{},
+        .strings = .{},
+        .element_names = .{},
+        .ns_prefixes = .{},
+        .character = undefined,
+
+        .source = source,
+        .loc = if (options.location_aware) Location.start else undefined,
+        .buf = &.{},
+        .pos = 0,
+
+        .node = null,
+        .error_code = undefined,
+        .error_pos = undefined,
+
+        .scratch = .{},
+
+        .gpa = gpa,
+    };
+}
+
+pub fn deinit(reader: *Reader) void {
+    reader.spans.deinit(reader.gpa);
+    reader.attributes.deinit(reader.gpa);
+    reader.q_attributes.deinit(reader.gpa);
+    reader.strings.deinit(reader.gpa);
+    reader.element_names.deinit(reader.gpa);
+    for (reader.ns_prefixes.items) |*map| map.deinit(reader.gpa);
+    reader.ns_prefixes.deinit(reader.gpa);
+    reader.scratch.deinit(reader.gpa);
+    reader.* = undefined;
+}
+
+/// Returns the location of the node.
+/// Asserts that the reader is location-aware and there is a current node (`read` was called and did not return an error).
+pub fn location(reader: Reader) Location {
+    assert(reader.options.location_aware and reader.node != null);
+    return reader.loc;
+}
+
+test location {
+    var doc = StaticDocument.init(
+        \\<root>
+        \\  <sub>Hello, world!</sub>
+        \\</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualDeep(Location{ .line = 1, .column = 1 }, reader.location());
+
+    try expectEqual(.text, try reader.read());
+    try expectEqualDeep(Location{ .line = 1, .column = 7 }, reader.location());
+
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualDeep(Location{ .line = 2, .column = 3 }, reader.location());
+
+    try expectEqual(.text, try reader.read());
+    try expectEqualDeep(Location{ .line = 2, .column = 8 }, reader.location());
+
+    try expectEqual(.element_end, try reader.read());
+    try expectEqualDeep(Location{ .line = 2, .column = 21 }, reader.location());
+
+    try expectEqual(.text, try reader.read());
+    try expectEqualDeep(Location{ .line = 2, .column = 27 }, reader.location());
+
+    try expectEqual(.element_end, try reader.read());
+    try expectEqualDeep(Location{ .line = 3, .column = 1 }, reader.location());
+}
+
+/// Returns the error code associated with the error.
+/// Asserts that `error.MalformedXml` was returned by the last call to `read`.
+pub fn errorCode(reader: Reader) ErrorCode {
+    assert(reader.state == .invalid);
+    return reader.error_code;
+}
+
+test errorCode {
+    var doc = StaticDocument.init(
+        \\<root>
+        \\  <123>Hello, world!</123>
+        \\</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(.text, try reader.read());
+    try expectError(error.MalformedXml, reader.read());
+    try expectEqual(.name_malformed, reader.errorCode());
+}
+
+/// Returns the location where the error occurred.
+/// Asserts that the reader is location-aware and `error.MalformedXml` was returned by the last call to `read`.
+pub fn errorLocation(reader: Reader) Location {
+    assert(reader.state == .invalid);
+    var loc = reader.loc;
+    loc.update(reader.buf[0..reader.error_pos]);
+    return loc;
+}
+
+test errorLocation {
+    var doc = StaticDocument.init(
+        \\<root>
+        \\  <123>Hello, world!</123>
+        \\</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(.text, try reader.read());
+    try expectError(error.MalformedXml, reader.read());
+    try expectEqualDeep(Location{ .line = 2, .column = 4 }, reader.errorLocation());
+}
+
+/// Returns the version declared in the XML declaration.
+/// Asserts that the current node is `Node.xml_version`.
+pub fn xmlDeclarationVersion(reader: Reader) []const u8 {
+    assert(reader.node == .xml_declaration);
+    return reader.attributeValueUnchecked(0);
+}
+
+test xmlDeclarationVersion {
+    var doc = StaticDocument.init(
+        \\<?xml version="1.0"?>
+        \\<root/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.xml_declaration, try reader.read());
+    try expectEqualStrings("1.0", reader.xmlDeclarationVersion());
+}
+
+/// Returns the encoding declared in the XML declaration.
+/// Asserts that the current node is `Node.xml_version`.
+pub fn xmlDeclarationEncoding(reader: Reader) ?[]const u8 {
+    assert(reader.node == .xml_declaration);
+    const n = reader.attributes.get("encoding") orelse return null;
+    return reader.attributeValueUnchecked(n);
+}
+
+test xmlDeclarationEncoding {
+    var doc = StaticDocument.init(
+        \\<?xml version="1.0" encoding="UTF-8"?>
+        \\<root/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.xml_declaration, try reader.read());
+    try expectEqualStrings("UTF-8", reader.xmlDeclarationEncoding().?);
+}
+
+/// Returns whether the XML declaration declares the document to be standalone.
+/// Asserts that the current node is `Node.xml_version`.
+pub fn xmlDeclarationStandalone(reader: Reader) ?bool {
+    assert(reader.node == .xml_declaration);
+    const n = reader.attributes.get("standalone") orelse return null;
+    return std.mem.eql(u8, reader.attributeValueUnchecked(n), "yes");
+}
+
+test xmlDeclarationStandalone {
+    var doc = StaticDocument.init(
+        \\<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+        \\<root/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.xml_declaration, try reader.read());
+    try expectEqual(true, reader.xmlDeclarationStandalone());
+}
+
+/// Returns the name of the element.
+/// Asserts that the current node is `Node.element_start` or `Node.element_end`.
+pub fn elementName(reader: Reader) []const u8 {
+    assert(reader.node == .element_start or reader.node == .element_end);
+    return reader.elementNameUnchecked();
+}
+
+test elementName {
+    var doc = StaticDocument.init(
+        \\<root/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualStrings("root", reader.elementName());
+    try expectEqual(.element_end, try reader.read());
+    try expectEqualStrings("root", reader.elementName());
+}
+
+/// Returns the name of the element as a `PrefixedQName`.
+/// Asserts that the current node is `Node.element_start` or `Node.element_end` and that `reader` is namespace-aware.
+pub fn elementNameNs(reader: Reader) PrefixedQName {
+    assert(reader.options.namespace_aware);
+    return reader.parseQName(reader.elementName());
+}
+
+test elementNameNs {
+    var doc = StaticDocument.init(
+        \\<root xmlns="https://example.com/ns" xmlns:a="https://example.com/ns2">
+        \\  <a:a/>
+        \\</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualStrings("", reader.elementNameNs().prefix);
+    try expectEqualStrings("https://example.com/ns", reader.elementNameNs().ns);
+    try expectEqualStrings("root", reader.elementNameNs().local);
+
+    try expectEqual(.text, try reader.read());
+
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualStrings("a", reader.elementNameNs().prefix);
+    try expectEqualStrings("https://example.com/ns2", reader.elementNameNs().ns);
+    try expectEqualStrings("a", reader.elementNameNs().local);
+
+    try expectEqual(.element_end, try reader.read());
+    try expectEqualStrings("a", reader.elementNameNs().prefix);
+    try expectEqualStrings("https://example.com/ns2", reader.elementNameNs().ns);
+    try expectEqualStrings("a", reader.elementNameNs().local);
+
+    try expectEqual(.text, try reader.read());
+
+    try expectEqual(.element_end, try reader.read());
+    try expectEqualStrings("", reader.elementNameNs().prefix);
+    try expectEqualStrings("https://example.com/ns", reader.elementNameNs().ns);
+    try expectEqualStrings("root", reader.elementNameNs().local);
+}
+
+fn elementNameUnchecked(reader: Reader) []const u8 {
+    return reader.bufSlice(reader.spans.items[0]);
+}
+
+fn elementNamePos(reader: Reader) usize {
+    return reader.spans.items[0].start;
+}
+
+/// Returns the number of attributes of the element.
+/// Asserts that the current node is `Node.element_start`.
+pub fn attributeCount(reader: Reader) usize {
+    assert(reader.node == .element_start);
+    return reader.attributeCountUnchecked();
+}
+
+test attributeCount {
+    var doc = StaticDocument.init(
+        \\<root a="1" b="2" c="3"/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(3, reader.attributeCount());
+}
+
+fn attributeCountUnchecked(reader: Reader) usize {
+    return @divExact(reader.spans.items.len - 1, 2);
+}
+
+/// Returns the name of the `n`th attribute of the element.
+/// Asserts that the current node is `Node.element_start` and `n` is less than `reader.nAttributes()`.
+pub fn attributeName(reader: Reader, n: usize) []const u8 {
+    assert(reader.node == .element_start and n < reader.attributeCount());
+    return reader.attributeNameUnchecked(n);
+}
+
+test attributeName {
+    var doc = StaticDocument.init(
+        \\<root a="1" b="2" c="3"/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualStrings("a", reader.attributeName(0));
+    try expectEqualStrings("b", reader.attributeName(1));
+    try expectEqualStrings("c", reader.attributeName(2));
+}
+
+/// Returns the name of the `n`th attribute of the element as a `PrefixedQName`.
+/// If the reader is not namespace-aware, only the `local` part will be non-empty.
+/// Asserts that the current node is `Node.element_start` and `n` is less than `reader.nAttributes()`.
+pub fn attributeNameNs(reader: Reader, n: usize) PrefixedQName {
+    const name = reader.attributeName(n);
+    return if (reader.options.namespace_aware) reader.parseQName(name) else .{
+        .prefix = "",
+        .ns = "",
+        .local = name,
+    };
+}
+
+test attributeNameNs {
+    var doc = StaticDocument.init(
+        \\<root xmlns:pre="https://example.com/ns" a="1" pre:b="2"/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+
+    try expectEqualStrings("xmlns", reader.attributeNameNs(0).prefix);
+    try expectEqualStrings("http://www.w3.org/2000/xmlns/", reader.attributeNameNs(0).ns);
+    try expectEqualStrings("pre", reader.attributeNameNs(0).local);
+
+    try expectEqualStrings("", reader.attributeNameNs(1).prefix);
+    try expectEqualStrings("", reader.attributeNameNs(1).ns);
+    try expectEqualStrings("a", reader.attributeNameNs(1).local);
+
+    try expectEqualStrings("pre", reader.attributeNameNs(2).prefix);
+    try expectEqualStrings("https://example.com/ns", reader.attributeNameNs(2).ns);
+    try expectEqualStrings("b", reader.attributeNameNs(2).local);
+}
+
+fn attributeNameUnchecked(reader: Reader, n: usize) []const u8 {
+    return reader.bufSlice(reader.spans.items[n * 2 + 1]);
+}
+
+fn attributeNamePos(reader: Reader, n: usize) usize {
+    return reader.spans.items[n * 2 + 1].start;
+}
+
+/// Returns the value of the `n`th attribute of the element.
+/// This function may incur allocations if the attribute value contains entity or character
+/// references, or CR, LF, or TAB characters which must be normalized according to the spec.
+/// The returned value is owned by `reader` and is only valid until the next call to another
+/// function on `reader`.
+/// Asserts that the current node is `Node.element_start` and `n` is less than `reader.nAttributes()`.
+pub fn attributeValue(reader: *Reader, n: usize) Allocator.Error![]const u8 {
+    const raw = reader.attributeValueRaw(n);
+    if (std.mem.indexOfAny(u8, raw, "&\t\r\n") == null) return raw;
+    reader.scratch.clearRetainingCapacity();
+    const writer = reader.scratch.writer(reader.gpa);
+    reader.attributeValueWrite(n, writer.any()) catch |err| switch (err) {
+        error.OutOfMemory => return error.OutOfMemory,
+        else => unreachable,
+    };
+    return reader.scratch.items;
+}
+
+test attributeValue {
+    var doc = StaticDocument.init(
+        \\<root a="1" b="2" c="1 &amp; 2"/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualStrings("1", try reader.attributeValue(0));
+    try expectEqualStrings("2", try reader.attributeValue(1));
+    try expectEqualStrings("1 & 2", try reader.attributeValue(2));
+}
+
+/// Returns the value of the `n`th attribute of the element.
+/// Asserts that the current node is `Node.element_start` and `n` is less than `reader.nAttributes()`.
+pub fn attributeValueAlloc(reader: Reader, gpa: Allocator, n: usize) Allocator.Error![]u8 {
+    var buf = std.ArrayList(u8).init(gpa);
+    defer buf.deinit();
+    const buf_writer = buf.writer();
+    reader.attributeValueWrite(n, buf_writer.any()) catch |err| switch (err) {
+        error.OutOfMemory => return error.OutOfMemory,
+        else => unreachable,
+    };
+    return buf.toOwnedSlice();
+}
+
+test attributeValueAlloc {
+    var doc = StaticDocument.init(
+        \\<root a="1" b="2" c="1 &amp; 2"/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+
+    const attr0 = try reader.attributeValueAlloc(std.testing.allocator, 0);
+    defer std.testing.allocator.free(attr0);
+    try expectEqualStrings("1", attr0);
+    const attr1 = try reader.attributeValueAlloc(std.testing.allocator, 1);
+    defer std.testing.allocator.free(attr1);
+    try expectEqualStrings("2", attr1);
+    const attr2 = try reader.attributeValueAlloc(std.testing.allocator, 2);
+    defer std.testing.allocator.free(attr2);
+    try expectEqualStrings("1 & 2", attr2);
+}
+
+/// Writes the value of the `n`th attribute of the element to `writer`.
+/// Asserts that the current node is `Node.element_start` and `n` is less than `reader.nAttributes()`.
+pub fn attributeValueWrite(reader: Reader, n: usize, writer: std.io.AnyWriter) anyerror!void {
+    const raw = reader.attributeValueRaw(n);
+    var pos: usize = 0;
+    while (std.mem.indexOfAnyPos(u8, raw, pos, "&\t\r\n")) |split_pos| {
+        try writer.writeAll(raw[pos..split_pos]);
+        pos = split_pos;
+        switch (raw[pos]) {
+            '&' => {
+                const entity_end = std.mem.indexOfScalarPos(u8, raw, pos, ';') orelse unreachable;
+                if (raw[pos + "&".len] == '#') {
+                    const c = if (raw[pos + "&#".len] == 'x')
+                        std.fmt.parseInt(u21, raw[pos + "&#x".len .. entity_end], 16) catch unreachable
+                    else
+                        std.fmt.parseInt(u21, raw[pos + "&#".len .. entity_end], 10) catch unreachable;
+                    var buf: [4]u8 = undefined;
+                    const len = std.unicode.utf8Encode(c, &buf) catch unreachable;
+                    try writer.writeAll(buf[0..len]);
+                } else {
+                    try writer.writeAll(predefined_entities.get(raw[pos + "&".len .. entity_end]) orelse unreachable);
+                }
+                pos = entity_end + 1;
+            },
+            '\t', '\n' => {
+                try writer.writeByte(' ');
+                pos += 1;
+            },
+            '\r' => {
+                try writer.writeByte(' ');
+                if (pos + 1 < raw.len and raw[pos + 1] == '\n') {
+                    pos += 2;
+                } else {
+                    pos += 1;
+                }
+            },
+            else => unreachable,
+        }
+    }
+    try writer.writeAll(raw[pos..]);
+}
+
+test attributeValueWrite {
+    var doc = StaticDocument.init(
+        \\<root a="1" b="2" c="1 &amp; 2"/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+
+    var buf = std.ArrayList(u8).init(std.testing.allocator);
+    defer buf.deinit();
+
+    try reader.attributeValueWrite(0, buf.writer());
+    try expectEqualStrings("1", buf.items);
+
+    buf.clearRetainingCapacity();
+    try reader.attributeValueWrite(1, buf.writer());
+    try expectEqualStrings("2", buf.items);
+
+    buf.clearRetainingCapacity();
+    try reader.attributeValueWrite(2, buf.writer());
+    try expectEqualStrings("1 & 2", buf.items);
+}
+
+/// Returns the raw value of the `n`th attribute of the element, as it appears in the source.
+/// Asserts that the current node is `Node.element_start` and `n` is less than `reader.nAttributes()`.
+pub fn attributeValueRaw(reader: Reader, n: usize) []const u8 {
+    assert(reader.node == .element_start and n < reader.attributeCount());
+    return reader.attributeValueUnchecked(n);
+}
+
+test attributeValueRaw {
+    var doc = StaticDocument.init(
+        \\<root a="1" b="2" c="1 &amp; 2"/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualStrings("1", reader.attributeValueRaw(0));
+    try expectEqualStrings("2", reader.attributeValueRaw(1));
+    try expectEqualStrings("1 &amp; 2", reader.attributeValueRaw(2));
+}
+
+fn attributeValueUnchecked(reader: Reader, n: usize) []const u8 {
+    return reader.bufSlice(reader.spans.items[n * 2 + 2]);
+}
+
+fn attributeValuePos(reader: Reader, n: usize) usize {
+    return reader.spans.items[n * 2 + 2].start;
+}
+
+fn attributeValueEndPos(reader: Reader, n: usize) usize {
+    return reader.spans.items[n * 2 + 2].end;
+}
+
+/// Returns the location of the `n`th attribute of the element.
+/// Asserts that the reader is location-aware, the current node is `Node.element_start`, and `n` is less than `reader.nAttributes()`.
+pub fn attributeLocation(reader: Reader, n: usize) Location {
+    assert(reader.options.location_aware and reader.node == .element_start and n < reader.attributeCount());
+    var loc = reader.loc;
+    loc.update(reader.buf[0..reader.attributeNamePos(n)]);
+    return loc;
+}
+
+/// Returns the index of the attribute named `name`.
+/// Asserts that the current node is `Node.element_start`.
+pub fn attributeIndex(reader: Reader, name: []const u8) ?usize {
+    assert(reader.node == .element_start);
+    return reader.attributes.get(name);
+}
+
+test attributeIndex {
+    var doc = StaticDocument.init(
+        \\<root one="1" two="2" three="3"/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(0, reader.attributeIndex("one"));
+    try expectEqual(1, reader.attributeIndex("two"));
+    try expectEqual(2, reader.attributeIndex("three"));
+    try expectEqual(null, reader.attributeIndex("four"));
+}
+
+/// Returns the index of the attribute with namespace `ns` and local name `local`.
+/// Asserts that the current node is `Node.element_start` and `reader` is namespace-aware.
+pub fn attributeIndexNs(reader: Reader, ns: []const u8, local: []const u8) ?usize {
+    assert(reader.node == .element_start and reader.options.namespace_aware);
+    return reader.q_attributes.get(.{ .ns = ns, .local = local });
+}
+
+test attributeIndexNs {
+    var doc = StaticDocument.init(
+        \\<root xmlns="http://example.com" xmlns:foo="http://example.com/foo" one="1" foo:two="2"/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(0, reader.attributeIndexNs("", "xmlns"));
+    try expectEqual(1, reader.attributeIndexNs("http://www.w3.org/2000/xmlns/", "foo"));
+    try expectEqual(2, reader.attributeIndexNs("", "one"));
+    try expectEqual(3, reader.attributeIndexNs("http://example.com/foo", "two"));
+    try expectEqual(null, reader.attributeIndexNs("http://example.com", "one"));
+    try expectEqual(null, reader.attributeIndexNs("", "three"));
+}
+
+/// Returns the text of the comment.
+/// This function may incur allocations if the comment text contains CR
+/// characters which must be normalized according to the spec.
+/// The returned value is owned by `reader` and is only valid until the next call to another
+/// function on `reader`.
+/// Asserts that the current node is `Node.comment`.
+pub fn comment(reader: *Reader) Allocator.Error![]const u8 {
+    return reader.newlineNormalizedScratch(reader.commentRaw());
+}
+
+test comment {
+    var doc = StaticDocument.init(
+        \\<!-- Hello, world! -->
+        \\<root/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.comment, try reader.read());
+    try expectEqualStrings(" Hello, world! ", try reader.comment());
+}
+
+/// Writes the text of the comment to `writer`.
+/// Asserts that the current node is `Node.comment`.
+pub fn commentWrite(reader: Reader, writer: std.io.AnyWriter) anyerror!void {
+    try writeNewlineNormalized(reader.commentRaw(), writer);
+}
+
+test commentWrite {
+    var doc = StaticDocument.init(
+        \\<!-- Hello, world! -->
+        \\<root/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.comment, try reader.read());
+
+    var buf = std.ArrayList(u8).init(std.testing.allocator);
+    defer buf.deinit();
+    try reader.commentWrite(buf.writer());
+    try expectEqualStrings(" Hello, world! ", buf.items);
+}
+
+/// Returns the raw text of the comment, as it appears in the source.
+/// Asserts that the current node is `Node.comment`.
+pub fn commentRaw(reader: Reader) []const u8 {
+    assert(reader.node == .comment);
+    return reader.commentUnchecked();
+}
+
+test commentRaw {
+    var doc = StaticDocument.init(
+        \\<!-- Hello, world! -->
+        \\<root/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.comment, try reader.read());
+    try expectEqualStrings(" Hello, world! ", reader.commentRaw());
+}
+
+fn commentUnchecked(reader: Reader) []const u8 {
+    return reader.bufSlice(reader.spans.items[0]);
+}
+
+fn commentPos(reader: Reader) usize {
+    return reader.spans.items[0].start;
+}
+
+/// Returns the target of the PI.
+/// Asserts that the current node is `Node.pi`.
+pub fn piTarget(reader: Reader) []const u8 {
+    assert(reader.node == .pi);
+    return reader.piTargetUnchecked();
+}
+
+test piTarget {
+    var doc = StaticDocument.init(
+        \\<?pi-target pi-data?>
+        \\<root/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.pi, try reader.read());
+    try expectEqualStrings("pi-target", reader.piTarget());
+}
+
+fn piTargetUnchecked(reader: Reader) []const u8 {
+    return reader.bufSlice(reader.spans.items[0]);
+}
+
+fn piTargetPos(reader: Reader) usize {
+    return reader.spans.items[0].start;
+}
+
+fn piTargetEndPos(reader: Reader) usize {
+    return reader.spans.items[0].end;
+}
+
+/// Returns the data of the PI.
+/// This function may incur allocations if the PI data contains CR
+/// characters which must be normalized according to the spec.
+/// The returned value is owned by `reader` and is only valid until the next call to another
+/// function on `reader`.
+/// Asserts that the current node is `Node.pi`.
+pub fn piData(reader: *Reader) Allocator.Error![]const u8 {
+    return reader.newlineNormalizedScratch(reader.piDataRaw());
+}
+
+test piData {
+    var doc = StaticDocument.init(
+        \\<?pi-target pi-data?>
+        \\<root/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.pi, try reader.read());
+    try expectEqualStrings("pi-data", try reader.piData());
+}
+
+/// Writes the data of the PI to `writer`.
+/// Asserts that the current node is `Node.pi`.
+pub fn piDataWrite(reader: Reader, writer: std.io.AnyWriter) anyerror!void {
+    try writeNewlineNormalized(reader.piDataRaw(), writer);
+}
+
+test piDataWrite {
+    var doc = StaticDocument.init(
+        \\<?pi-target pi-data?>
+        \\<root/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.pi, try reader.read());
+
+    var buf = std.ArrayList(u8).init(std.testing.allocator);
+    defer buf.deinit();
+    try reader.piDataWrite(buf.writer());
+    try expectEqualStrings("pi-data", buf.items);
+}
+
+/// Returns the raw data of the PI, as it appears in the source.
+/// Asserts that the current node is `Node.pi`.
+pub fn piDataRaw(reader: Reader) []const u8 {
+    assert(reader.node == .pi);
+    return reader.piDataUnchecked();
+}
+
+test piDataRaw {
+    var doc = StaticDocument.init(
+        \\<?pi-target pi-data?>
+        \\<root/>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.pi, try reader.read());
+    try expectEqualStrings("pi-data", reader.piDataRaw());
+}
+
+fn piDataUnchecked(reader: Reader) []const u8 {
+    return reader.bufSlice(reader.spans.items[1]);
+}
+
+fn piDataPos(reader: Reader) usize {
+    return reader.spans.items[1].start;
+}
+
+fn piDataEndPos(reader: Reader) usize {
+    return reader.spans.items[1].end;
+}
+
+/// Returns the text.
+/// This function may incur allocations if the text contains CR
+/// characters which must be normalized according to the spec.
+/// The returned value is owned by `reader` and is only valid until the next call to another
+/// function on `reader`.
+/// Asserts that the current node is `Node.text`.
+pub fn text(reader: *Reader) Allocator.Error![]const u8 {
+    return reader.newlineNormalizedScratch(reader.textRaw());
+}
+
+test text {
+    var doc = StaticDocument.init(
+        \\<root>Hello, world!</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(.text, try reader.read());
+    try expectEqualStrings("Hello, world!", try reader.text());
+}
+
+/// Writes the text to `writer`.
+/// Asserts that the current node is `Node.text`.
+pub fn textWrite(reader: Reader, writer: std.io.AnyWriter) anyerror!void {
+    try writeNewlineNormalized(reader.textRaw(), writer);
+}
+
+test textWrite {
+    var doc = StaticDocument.init(
+        \\<root>Hello, world!</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(.text, try reader.read());
+
+    var buf = std.ArrayList(u8).init(std.testing.allocator);
+    defer buf.deinit();
+    try reader.textWrite(buf.writer());
+    try expectEqualStrings("Hello, world!", buf.items);
+}
+
+/// Returns the raw text, as it appears in the source.
+/// Asserts that the current node is `Node.text`.
+pub fn textRaw(reader: Reader) []const u8 {
+    assert(reader.node == .text);
+    return reader.textUnchecked();
+}
+
+test textRaw {
+    var doc = StaticDocument.init(
+        \\<root>Hello, world!</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(.text, try reader.read());
+    try expectEqualStrings("Hello, world!", reader.textRaw());
+}
+
+fn textUnchecked(reader: Reader) []const u8 {
+    return reader.buf[0..reader.pos];
+}
+
+fn textPos(reader: Reader) usize {
+    _ = reader;
+    return 0;
+}
+
+/// Returns the text of the CDATA section.
+/// This function may incur allocations if the text contains CR
+/// characters which must be normalized according to the spec.
+/// The returned value is owned by `reader` and is only valid until the next call to another
+/// function on `reader`.
+/// Asserts that the current node is `Node.cdata`.
+pub fn cdata(reader: *Reader) Allocator.Error![]const u8 {
+    return reader.newlineNormalizedScratch(reader.cdataRaw());
+}
+
+test cdata {
+    var doc = StaticDocument.init(
+        \\<root><![CDATA[Hello, world!]]></root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(.cdata, try reader.read());
+    try expectEqualStrings("Hello, world!", try reader.cdata());
+}
+
+/// Writes the text of the CDATA section to `writer`.
+/// Asserts that the current node is `Node.cdata`.
+pub fn cdataWrite(reader: Reader, writer: std.io.AnyWriter) anyerror!void {
+    try writeNewlineNormalized(reader.cdataRaw(), writer);
+}
+
+test cdataWrite {
+    var doc = StaticDocument.init(
+        \\<root><![CDATA[Hello, world!]]></root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(.cdata, try reader.read());
+
+    var buf = std.ArrayList(u8).init(std.testing.allocator);
+    defer buf.deinit();
+    try reader.cdataWrite(buf.writer());
+    try expectEqualStrings("Hello, world!", buf.items);
+}
+
+/// Returns the raw text of the CDATA section, as it appears in the source.
+/// Asserts that the current node is `Node.cdata`.
+pub fn cdataRaw(reader: Reader) []const u8 {
+    assert(reader.node == .cdata);
+    return reader.cdataUnchecked();
+}
+
+test cdataRaw {
+    var doc = StaticDocument.init(
+        \\<root><![CDATA[Hello, world!]]></root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(.cdata, try reader.read());
+    try expectEqualStrings("Hello, world!", reader.cdataRaw());
+}
+
+fn cdataUnchecked(reader: Reader) []const u8 {
+    return reader.bufSlice(reader.spans.items[0]);
+}
+
+fn cdataPos(reader: Reader) usize {
+    return reader.spans.items[0].start;
+}
+
+/// Returns the name of the referenced entity.
+/// Asserts that the current node is `Node.entity_reference`.
+pub fn entityReferenceName(reader: Reader) []const u8 {
+    assert(reader.node == .entity_reference);
+    return reader.entityReferenceNameUnchecked();
+}
+
+test entityReferenceName {
+    var doc = StaticDocument.init(
+        \\<root>&amp;</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(.entity_reference, try reader.read());
+    try expectEqualStrings("amp", reader.entityReferenceName());
+}
+
+fn entityReferenceNameUnchecked(reader: Reader) []const u8 {
+    return reader.bufSlice(reader.spans.items[0]);
+}
+
+fn entityReferenceNamePos(reader: Reader) usize {
+    return reader.spans.items[0].start;
+}
+
+/// Returns the referenced character (Unicode codepoint).
+/// Asserts that the current node is `Node.character_reference`.
+pub fn characterReferenceChar(reader: Reader) u21 {
+    assert(reader.node == .character_reference);
+    return reader.character;
+}
+
+test characterReferenceChar {
+    var doc = StaticDocument.init(
+        \\<root>&#x20;</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(.character_reference, try reader.read());
+    try expectEqual(0x20, reader.characterReferenceChar());
+}
+
+/// Returns the "name" of the referenced character, as it appears in the source.
+/// Asserts that the current node is `Node.character_reference`.
+pub fn characterReferenceName(reader: Reader) []const u8 {
+    assert(reader.node == .character_reference);
+    return reader.characterReferenceNameUnchecked();
+}
+
+test characterReferenceName {
+    var doc = StaticDocument.init(
+        \\<root>&#x20;</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+    try expectEqual(.element_start, try reader.read());
+    try expectEqual(.character_reference, try reader.read());
+    try expectEqualStrings("x20", reader.characterReferenceName());
+}
+
+fn characterReferenceNameUnchecked(reader: Reader) []const u8 {
+    return reader.bufSlice(reader.spans.items[0]);
+}
+
+fn characterReferenceNamePos(reader: Reader) usize {
+    return reader.spans.items[0].start;
+}
+
+fn newlineNormalizedScratch(reader: *Reader, raw: []const u8) Allocator.Error![]const u8 {
+    if (std.mem.indexOfScalar(u8, raw, '\r') == null) return raw;
+    reader.scratch.clearRetainingCapacity();
+    const writer = reader.scratch.writer(reader.gpa);
+    writeNewlineNormalized(raw, writer.any()) catch |err| switch (err) {
+        error.OutOfMemory => return error.OutOfMemory,
+        else => unreachable,
+    };
+    return reader.scratch.items;
+}
+
+fn writeNewlineNormalized(raw: []const u8, writer: std.io.AnyWriter) anyerror!void {
+    var pos: usize = 0;
+    while (std.mem.indexOfScalarPos(u8, raw, pos, '\r')) |cr_pos| {
+        try writer.writeAll(raw[pos..cr_pos]);
+        try writer.writeByte('\n');
+        if (cr_pos + 1 < raw.len and raw[cr_pos + 1] == '\n') {
+            pos = cr_pos + "\r\n".len;
+        } else {
+            pos = cr_pos + "\r".len;
+        }
+    }
+    try writer.writeAll(raw[pos..]);
+}
+
+/// Returns the namespace URI bound to `prefix`, or an empty string if none.
+/// If the reader is not namespace-aware, always returns an empty string.
+pub fn namespaceUri(reader: Reader, prefix: []const u8) []const u8 {
+    if (!reader.options.namespace_aware) return "";
+    if (predefined_namespace_uris.get(prefix)) |uri| return uri;
+    var i = reader.ns_prefixes.items.len;
+    const index = while (i > 0) {
+        i -= 1;
+        if (reader.ns_prefixes.items[i].getAdapted(prefix, StringIndexAdapter{
+            .strings = reader.strings.items,
+        })) |uri| break uri;
+    } else return "";
+    return reader.string(index);
+}
+
+test namespaceUri {
+    var doc = StaticDocument.init(
+        \\<root
+        \\  xmlns="https://example.com/default"
+        \\  xmlns:other="https://example.com/other"
+        \\>
+        \\  <a xmlns:child="https://example.com/child"/>
+        \\</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualStrings("https://example.com/default", reader.namespaceUri(""));
+    try expectEqualStrings("https://example.com/other", reader.namespaceUri("other"));
+    try expectEqualStrings("", reader.namespaceUri("child"));
+
+    try expectEqual(.text, try reader.read());
+    try expectEqualStrings("https://example.com/default", reader.namespaceUri(""));
+    try expectEqualStrings("https://example.com/other", reader.namespaceUri("other"));
+    try expectEqualStrings("", reader.namespaceUri("child"));
+
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualStrings("https://example.com/default", reader.namespaceUri(""));
+    try expectEqualStrings("https://example.com/other", reader.namespaceUri("other"));
+    try expectEqualStrings("https://example.com/child", reader.namespaceUri("child"));
+
+    try expectEqual(.element_end, try reader.read());
+    try expectEqualStrings("https://example.com/default", reader.namespaceUri(""));
+    try expectEqualStrings("https://example.com/other", reader.namespaceUri("other"));
+    try expectEqualStrings("https://example.com/child", reader.namespaceUri("child"));
+
+    try expectEqual(.text, try reader.read());
+    try expectEqualStrings("https://example.com/default", reader.namespaceUri(""));
+    try expectEqualStrings("https://example.com/other", reader.namespaceUri("other"));
+    try expectEqualStrings("", reader.namespaceUri("child"));
+
+    try expectEqual(.element_end, try reader.read());
+    try expectEqualStrings("https://example.com/default", reader.namespaceUri(""));
+    try expectEqualStrings("https://example.com/other", reader.namespaceUri("other"));
+    try expectEqualStrings("", reader.namespaceUri("child"));
+}
+
+fn parseQName(reader: Reader, name: []const u8) PrefixedQName {
+    const prefix, const local = if (std.mem.indexOfScalar(u8, name, ':')) |colon_pos|
+        .{ name[0..colon_pos], name[colon_pos + 1 ..] }
+    else
+        .{ "", name };
+    return .{
+        .prefix = prefix,
+        .ns = reader.namespaceUri(prefix),
+        .local = local,
+    };
+}
+
+pub const ReadError = error{MalformedXml} || Allocator.Error;
+
+/// Reads and returns the next node in the document.
+pub fn read(reader: *Reader) anyerror!Node {
+    errdefer reader.node = null;
+    const node: Node = while (true) {
+        switch (reader.state) {
+            .invalid => return error.MalformedXml,
+            .start => {
+                try reader.shift();
+                try reader.skipBom();
+                if (try reader.readMatch("<?")) {
+                    try reader.readName();
+                    if (std.mem.eql(u8, reader.piTargetUnchecked(), "xml")) {
+                        try reader.readXmlDeclarationContent();
+                        reader.state = .after_xml_declaration;
+                        try reader.checkXmlDeclaration();
+                        break .xml_declaration;
+                    } else {
+                        try reader.readPiContent();
+                        reader.state = .after_xml_declaration;
+                        try reader.checkPi();
+                        break .pi;
+                    }
+                }
+                reader.state = .after_xml_declaration;
+                continue;
+            },
+            .after_xml_declaration => {
+                try reader.skipSpace();
+                if (try reader.readMatch("<?")) {
+                    try reader.readName();
+                    try reader.readPiContent();
+                    try reader.checkPi();
+                    break .pi;
+                } else if (try reader.readMatch("<!--")) {
+                    try reader.readCommentContent();
+                    try reader.checkComment();
+                    break .comment;
+                } else if (try reader.readMatch("<!DOCTYPE")) {
+                    return reader.fatal(.doctype_unsupported, reader.pos);
+                }
+                reader.state = .after_doctype;
+                continue;
+            },
+            .after_doctype => {
+                try reader.skipSpace();
+                if (reader.pos == reader.buf.len) {
+                    return reader.fatal(.unexpected_eof, reader.pos);
+                } else if (try reader.readMatch("<?")) {
+                    try reader.readName();
+                    try reader.readPiContent();
+                    try reader.checkPi();
+                    break .pi;
+                } else if (try reader.readMatch("<!--")) {
+                    try reader.readCommentContent();
+                    try reader.checkComment();
+                    break .comment;
+                } else if (try reader.readMatch("<")) {
+                    try reader.readName();
+                    reader.state = if (try reader.readElementStartContent()) .empty_root else .in_root;
+                    try reader.checkElementStart();
+                    break .element_start;
+                } else {
+                    return reader.fatal(.unexpected_character, reader.pos);
+                }
+            },
+            .in_root => {
+                try reader.shift();
+                if (reader.pos == reader.buf.len) {
+                    return reader.fatal(.unexpected_eof, reader.pos);
+                } else if (try reader.readMatch("&#")) {
+                    try reader.readCharacterReference();
+                    if (!try reader.readMatch(";")) return reader.fatal(.character_reference_unclosed, reader.pos);
+                    try reader.checkCharacterReference();
+                    break .character_reference;
+                } else if (try reader.readMatch("&")) {
+                    try reader.readName();
+                    if (!try reader.readMatch(";")) return reader.fatal(.entity_reference_unclosed, reader.pos);
+                    try reader.checkEntityReference();
+                    break .entity_reference;
+                } else if (try reader.readMatch("<?")) {
+                    try reader.readName();
+                    try reader.readPiContent();
+                    try reader.checkPi();
+                    break .pi;
+                } else if (try reader.readMatch("<!--")) {
+                    try reader.readCommentContent();
+                    try reader.checkComment();
+                    break .comment;
+                } else if (try reader.readMatch("<![CDATA[")) {
+                    try reader.readCdata();
+                    try reader.checkCdata();
+                    break .cdata;
+                } else if (try reader.readMatch("</")) {
+                    try reader.readName();
+                    try reader.readSpace();
+                    if (!try reader.readMatch(">")) return reader.fatal(.element_end_unclosed, reader.pos);
+                    try reader.checkElementEnd();
+                    if (reader.element_names.items.len == 1) reader.state = .after_root;
+                    break .element_end;
+                } else if (try reader.readMatch("<")) {
+                    try reader.readName();
+                    if (try reader.readElementStartContent()) {
+                        reader.state = .empty_element;
+                    }
+                    try reader.checkElementStart();
+                    break .element_start;
+                } else {
+                    try reader.readText();
+                    try reader.checkText();
+                    break .text;
+                }
+            },
+            .empty_element => {
+                reader.state = .in_root;
+                break .element_end;
+            },
+            .empty_root => {
+                reader.state = .after_root;
+                break .element_end;
+            },
+            .after_root => {
+                try reader.skipSpace();
+                if (reader.pos == reader.buf.len) {
+                    reader.state = .eof;
+                    continue;
+                } else if (try reader.readMatch("<?")) {
+                    try reader.readName();
+                    try reader.readPiContent();
+                    try reader.checkPi();
+                    break .pi;
+                } else if (try reader.readMatch("<!--")) {
+                    try reader.readCommentContent();
+                    try reader.checkComment();
+                    break .comment;
+                } else {
+                    return reader.fatal(.unexpected_character, reader.pos);
+                }
+            },
+            .eof => break .eof,
+        }
+    };
+    reader.node = node;
+    return node;
+}
+
+/// Reads and returns the text content of the element and its children.
+/// The current node after returning is the end of the element.
+/// The returned value is owned by `reader` and is only valid until the next call to another
+/// function on `reader`.
+/// Asserts that the current node is `Node.element_start`.
+pub fn readElementText(reader: *Reader) anyerror![]const u8 {
+    reader.scratch.clearRetainingCapacity();
+    const writer = reader.scratch.writer(reader.gpa);
+    try reader.readElementTextWrite(writer.any());
+    return reader.scratch.items;
+}
+
+test readElementText {
+    var doc = StaticDocument.init(
+        \\<root>Hello, <em>world</em>!</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualStrings("root", reader.elementName());
+    try expectEqualStrings("Hello, world!", try reader.readElementText());
+    try expectEqualStrings("root", reader.elementName());
+    try expectEqual(.eof, try reader.read());
+}
+
+/// Reads and returns the text content of the element and its children.
+/// The current node after returning is the end of the element.
+/// Asserts that the current node is `Node.element_start`.
+pub fn readElementTextAlloc(reader: *Reader, gpa: Allocator) anyerror![]u8 {
+    var buf = std.ArrayList(u8).init(gpa);
+    defer buf.deinit();
+    const buf_writer = buf.writer();
+    reader.readElementTextWrite(buf_writer.any()) catch |err| switch (err) {
+        error.OutOfMemory => return error.OutOfMemory,
+        else => unreachable,
+    };
+    return buf.toOwnedSlice();
+}
+
+test readElementTextAlloc {
+    var doc = StaticDocument.init(
+        \\<root>Hello, <em>world</em>!</root>
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualStrings("root", reader.elementName());
+    const element_text = try reader.readElementTextAlloc(std.testing.allocator);
+    defer std.testing.allocator.free(element_text);
+    try expectEqualStrings("Hello, world!", element_text);
+    try expectEqualStrings("root", reader.elementName());
+    try expectEqual(.eof, try reader.read());
+}
+
+/// Reads the text content of the element and its children and writes it to
+/// `writer`.
+/// The current node after returning is the end of the element.
+/// Asserts that the current node is `Node.element_start`.
+pub fn readElementTextWrite(reader: *Reader, writer: std.io.AnyWriter) anyerror!void {
+    assert(reader.node == .element_start);
+    const depth = reader.element_names.items.len;
+    while (true) {
+        switch (try reader.read()) {
+            .xml_declaration, .eof => unreachable,
+            .element_start, .comment, .pi => {},
+            .element_end => if (reader.element_names.items.len == depth) return,
+            .text => try reader.textWrite(writer),
+            .cdata => try reader.cdataWrite(writer),
+            .character_reference => {
+                var buf: [4]u8 = undefined;
+                const len = std.unicode.utf8Encode(reader.characterReferenceChar(), &buf) catch unreachable;
+                try writer.writeAll(buf[0..len]);
+            },
+            .entity_reference => {
+                const expanded = predefined_entities.get(reader.entityReferenceName()) orelse unreachable;
+                try writer.writeAll(expanded);
+            },
+        }
+    }
+}
+
+/// Reads and discards all document content until the start of the root element,
+/// which is the current node after this function returns successfully.
+/// Asserts that the start of the root element has not yet been read.
+pub fn skipProlog(reader: *Reader) anyerror!void {
+    assert(reader.state == .start or reader.state == .after_xml_declaration or reader.state == .after_doctype);
+    while (true) {
+        if (try reader.read() == .element_start) return;
+    }
+}
+
+test skipProlog {
+    var doc = StaticDocument.init(
+        \\<?xml version="1.0"?>
+        \\<!-- Irrelevant comment -->
+        \\<?some-pi?>
+        \\<root/>
+        \\
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+
+    try reader.skipProlog();
+    try expectEqualStrings("root", reader.elementName());
+    try expectEqual(.element_end, try reader.read());
+    try expectEqualStrings("root", reader.elementName());
+    try expectEqual(.eof, try reader.read());
+}
+
+/// Reads and discards all document content until the end of the containing
+/// element, which is the current node after this function returns successfully.
+/// Asserts that the reader is currently inside an element (not before or after
+/// the root element).
+pub fn skipElement(reader: *Reader) anyerror!void {
+    assert(reader.state == .in_root or reader.state == .empty_element or reader.state == .empty_root);
+    const depth = reader.element_names.items.len;
+    while (true) {
+        if (try reader.read() == .element_end and reader.element_names.items.len == depth) return;
+    }
+}
+
+test skipElement {
+    var doc = StaticDocument.init(
+        \\<root>
+        \\  <sub>Hello, world!</sub>
+        \\  <!-- Some comment -->
+        \\</root>
+        \\
+    );
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualStrings("root", reader.elementName());
+    try reader.skipElement();
+    try expectEqualStrings("root", reader.elementName());
+    try expectEqual(.eof, try reader.read());
+}
+
+fn readXmlDeclarationContent(reader: *Reader) !void {
+    while (true) {
+        try reader.readSpace();
+        if (try reader.readMatch("?>")) return;
+        try reader.readPair();
+    }
+}
+
+fn checkXmlDeclaration(reader: *Reader) !void {
+    try reader.checkAttributes();
+    var state: enum {
+        start,
+        after_version,
+        after_encoding,
+        end,
+    } = .start;
+    for (0..reader.attributeCountUnchecked()) |i| {
+        const name = reader.attributeNameUnchecked(i);
+        const value = reader.attributeValueUnchecked(i);
+        switch (state) {
+            .start => if (std.mem.eql(u8, name, "version")) {
+                try reader.checkXmlVersion(value, i);
+                state = .after_version;
+            } else {
+                return reader.fatal(.xml_declaration_version_missing, 0);
+            },
+            .after_version => if (std.mem.eql(u8, name, "encoding")) {
+                try reader.checkXmlEncoding(value, i);
+                state = .after_encoding;
+            } else if (std.mem.eql(u8, name, "standalone")) {
+                try reader.checkXmlStandalone(value, i);
+                state = .end;
+            } else {
+                return reader.fatal(.xml_declaration_attribute_unsupported, reader.attributeNamePos(i));
+            },
+            .after_encoding => if (std.mem.eql(u8, name, "standalone")) {
+                try reader.checkXmlStandalone(value, i);
+                state = .end;
+            } else {
+                return reader.fatal(.xml_declaration_attribute_unsupported, reader.attributeNamePos(i));
+            },
+            .end => return reader.fatal(.xml_declaration_attribute_unsupported, reader.attributeNamePos(i)),
+        }
+    }
+    if (state == .start) {
+        return reader.fatal(.xml_declaration_version_missing, 0);
+    }
+}
+
+fn checkXmlVersion(reader: *Reader, version: []const u8, n_attr: usize) !void {
+    if (!std.mem.startsWith(u8, version, "1.")) {
+        return reader.fatal(.xml_declaration_version_unsupported, reader.attributeValuePos(n_attr));
+    }
+    for (version["1.".len..]) |c| {
+        switch (c) {
+            '0'...'9' => {},
+            else => return reader.fatal(.xml_declaration_version_unsupported, reader.attributeValuePos(n_attr)),
+        }
+    }
+}
+
+fn checkXmlEncoding(reader: *Reader, encoding: []const u8, n_attr: usize) !void {
+    if (!std.ascii.eqlIgnoreCase(encoding, "utf-8")) {
+        return reader.fatal(.xml_declaration_encoding_unsupported, reader.attributeValuePos(n_attr));
+    }
+}
+
+fn checkXmlStandalone(reader: *Reader, standalone: []const u8, n_attr: usize) !void {
+    if (!std.mem.eql(u8, standalone, "yes") and !std.mem.eql(u8, standalone, "no")) {
+        return reader.fatal(.xml_declaration_standalone_malformed, reader.attributeValuePos(n_attr));
+    }
+}
+
+fn readElementStartContent(reader: *Reader) !bool {
+    while (true) {
+        try reader.readSpace();
+        if (try reader.readMatch("/>")) {
+            return true;
+        } else if (try reader.readMatch(">")) {
+            return false;
+        } else {
+            try reader.readPair();
+        }
+    }
+}
+
+fn checkElementStart(reader: *Reader) !void {
+    const element_name = reader.elementNameUnchecked();
+    const element_name_pos = reader.elementNamePos();
+    try reader.checkName(element_name, element_name_pos);
+    try reader.checkAttributes();
+
+    const element_name_index = try reader.addString(element_name);
+    try reader.element_names.append(reader.gpa, element_name_index);
+
+    if (reader.options.namespace_aware) {
+        try reader.ns_prefixes.append(reader.gpa, .{});
+        try reader.checkAttributesNs();
+        if (std.mem.indexOfScalar(u8, element_name, ':')) |colon_pos| {
+            const prefix = element_name[0..colon_pos];
+            if (std.mem.eql(u8, prefix, "xmlns")) return reader.fatal(.namespace_prefix_illegal, element_name_pos);
+            try reader.checkNcName(prefix, element_name_pos);
+            const local = element_name[colon_pos + 1 ..];
+            try reader.checkNcName(local, element_name_pos);
+            if (reader.namespaceUri(prefix).len == 0) return reader.fatal(.namespace_prefix_unbound, element_name_pos);
+        }
+    }
+}
+
+fn checkAttributes(reader: *Reader) !void {
+    const n_attributes = reader.attributeCountUnchecked();
+    try reader.attributes.ensureUnusedCapacity(reader.gpa, n_attributes);
+    for (0..n_attributes) |i| {
+        const name_pos = reader.attributeNamePos(i);
+        if (i > 0 and name_pos == reader.attributeValueEndPos(i - 1) + 1) {
+            return reader.fatal(.attribute_missing_space, name_pos);
+        }
+
+        const name = reader.attributeNameUnchecked(i);
+        try reader.checkName(name, name_pos);
+
+        const gop = reader.attributes.getOrPutAssumeCapacity(name);
+        if (gop.found_existing) return reader.fatal(.attribute_duplicate, name_pos);
+        gop.value_ptr.* = i;
+
+        try reader.checkAttributeValue(i);
+    }
+}
+
+fn checkAttributeValue(reader: *Reader, n: usize) !void {
+    const s = reader.attributeValueUnchecked(n);
+    const pos = reader.attributeValuePos(n);
+    try reader.validateUtf8(s, pos);
+    var i: usize = 0;
+    while (i < s.len) : (i += 1) {
+        switch (s[i]) {
+            '\t',
+            '\n',
+            '\r',
+            0x20...('&' - 1),
+            ('&' + 1)...('<' - 1),
+            ('<' + 1)...0xEE,
+            0xF0...0xFF,
+            => {},
+            0xEF => {
+                // We already validated for correct UTF-8, so we know 2 bytes follow.
+                // The Unicode codepoints U+FFFE and U+FFFF are not allowed as characters:
+                // U+FFFE: EF BF BE
+                // U+FFFF: EF BF BF
+                if (s[i + 1] == 0xBF and (s[i + 2] == 0xBE or s[i + 2] == 0xBF)) {
+                    return reader.fatal(.illegal_character, pos + i);
+                }
+            },
+            '<' => return reader.fatal(.attribute_illegal_character, pos + i),
+            '&' => {
+                if (std.mem.startsWith(u8, s[i + "&".len ..], "#")) {
+                    const end = std.mem.indexOfScalarPos(u8, s, i, ';') orelse return reader.fatal(.character_reference_unclosed, pos + i);
+                    const ref = s[i + "&#".len .. end];
+                    const c = if (std.mem.startsWith(u8, ref, "x"))
+                        std.fmt.parseInt(u21, ref["x".len..], 16) catch return reader.fatal(.character_reference_malformed, pos + i)
+                    else
+                        std.fmt.parseInt(u21, ref, 10) catch return reader.fatal(.character_reference_malformed, pos + i);
+                    if (!isChar(c)) return reader.fatal(.character_reference_malformed, pos + i);
+                } else {
+                    const end = std.mem.indexOfScalarPos(u8, s, i, ';') orelse return reader.fatal(.entity_reference_unclosed, pos + i);
+                    const ref = s[i + "&".len .. end];
+                    if (!predefined_entities.has(ref)) return reader.fatal(.entity_reference_undefined, pos + i);
+                    i = end;
+                }
+            },
+            else => return reader.fatal(.illegal_character, pos + i),
+        }
+    }
+}
+
+fn checkAttributesNs(reader: *Reader) !void {
+    const n_attributes = reader.attributeCountUnchecked();
+    try reader.q_attributes.ensureUnusedCapacity(reader.gpa, n_attributes);
+    const prefix_bindings = &reader.ns_prefixes.items[reader.ns_prefixes.items.len - 1];
+
+    for (0..n_attributes) |i| {
+        const name = reader.attributeNameUnchecked(i);
+        const pos = reader.attributeNamePos(i);
+        if (std.mem.eql(u8, name, "xmlns")) {
+            const value = reader.attributeValueUnchecked(i);
+            const uri_index = try reader.addAttributeValueString(value);
+            const uri = reader.string(uri_index);
+            if (std.mem.eql(u8, uri, ns_xml) or std.mem.eql(u8, uri, ns_xmlns)) {
+                return reader.fatal(.namespace_binding_illegal, pos);
+            }
+            try prefix_bindings.putNoClobber(reader.gpa, .empty, uri_index);
+        } else if (std.mem.startsWith(u8, name, "xmlns:")) {
+            const prefix = name["xmlns:".len..];
+            if (std.mem.eql(u8, prefix, "xmlns")) return reader.fatal(.namespace_binding_illegal, pos);
+            try reader.checkNcName(prefix, pos);
+            const prefix_index = try reader.addString(prefix);
+            const value = reader.attributeValueUnchecked(i);
+            if (value.len == 0) return reader.fatal(.attribute_prefix_undeclared, pos);
+            const uri_index = try reader.addAttributeValueString(value);
+            const uri = reader.string(uri_index);
+            if (std.mem.eql(u8, uri, "xml") != std.mem.eql(u8, uri, ns_xml)) return reader.fatal(.namespace_binding_illegal, pos);
+            if (std.mem.eql(u8, uri, ns_xmlns)) return reader.fatal(.namespace_binding_illegal, pos);
+            try prefix_bindings.putNoClobber(reader.gpa, prefix_index, uri_index);
+        }
+    }
+
+    for (0..n_attributes) |i| {
+        const name = reader.attributeNameUnchecked(i);
+        const pos = reader.attributeNamePos(i);
+        const colon_pos = std.mem.indexOfScalar(u8, name, ':') orelse {
+            reader.q_attributes.putAssumeCapacityNoClobber(.{ .ns = "", .local = name }, i);
+            continue;
+        };
+        const prefix = name[0..colon_pos];
+        try reader.checkNcName(prefix, pos);
+        const local = name[colon_pos + 1 ..];
+        try reader.checkNcName(local, pos);
+        const uri = reader.namespaceUri(prefix);
+        if (uri.len == 0) return reader.fatal(.namespace_prefix_unbound, pos);
+        const gop = reader.q_attributes.getOrPutAssumeCapacity(.{ .ns = uri, .local = local });
+        if (gop.found_existing) return reader.fatal(.attribute_duplicate, pos);
+        gop.value_ptr.* = i;
+    }
+}
+
+fn addAttributeValueString(reader: *Reader, raw_value: []const u8) !StringIndex {
+    try reader.strings.append(reader.gpa, 0);
+    const start = reader.strings.items.len;
+    var i: usize = 0;
+    while (i < raw_value.len) : (i += 1) {
+        switch (raw_value[i]) {
+            '\t', '\n' => try reader.strings.append(reader.gpa, ' '),
+            '\r' => {
+                try reader.strings.append(reader.gpa, ' ');
+                if (i + 1 < raw_value.len and raw_value[i + 1] == '\n') i += 1;
+            },
+            '&' => {
+                const entity_end = std.mem.indexOfScalarPos(u8, raw_value, i, ';') orelse unreachable;
+                if (raw_value[i + "&".len] == '#') {
+                    const c = if (raw_value[i + "&#".len] == 'x')
+                        std.fmt.parseInt(u21, raw_value[i + "&#x".len .. entity_end], 16) catch unreachable
+                    else
+                        std.fmt.parseInt(u21, raw_value[i + "&#".len .. entity_end], 10) catch unreachable;
+                    try reader.strings.ensureUnusedCapacity(reader.gpa, 4);
+                    reader.strings.items.len += std.unicode.utf8Encode(c, reader.strings.items) catch unreachable;
+                } else {
+                    const expansion = predefined_entities.get(raw_value[i + "&".len .. entity_end]) orelse unreachable;
+                    try reader.strings.appendSlice(reader.gpa, expansion);
+                }
+                i = entity_end;
+            },
+            else => |b| try reader.strings.append(reader.gpa, b),
+        }
+    }
+    return @enumFromInt(start);
+}
+
+fn checkElementEnd(reader: *Reader) !void {
+    const element_name = reader.string(reader.element_names.getLast());
+    if (!std.mem.eql(u8, reader.elementNameUnchecked(), element_name)) {
+        return reader.fatal(.element_end_mismatched, reader.elementNamePos());
+    }
+}
+
+fn readCommentContent(reader: *Reader) !void {
+    const start = reader.pos;
+    while (true) {
+        reader.pos = std.mem.indexOfPos(u8, reader.buf, reader.pos, "--") orelse reader.buf.len;
+        if (reader.pos < reader.buf.len) {
+            if (!std.mem.startsWith(u8, reader.buf[reader.pos + "--".len ..], ">")) {
+                return reader.fatal(.comment_malformed, reader.pos);
+            }
+            try reader.spans.append(reader.gpa, .{ .start = start, .end = reader.pos });
+            reader.pos += "-->".len;
+            return;
+        }
+        try reader.more();
+        if (reader.pos == reader.buf.len) return reader.fatal(.comment_unclosed, reader.pos);
+    }
+}
+
+fn checkComment(reader: *Reader) !void {
+    try reader.checkChars(reader.commentUnchecked(), reader.commentPos());
+}
+
+fn readPiContent(reader: *Reader) !void {
+    try reader.readSpace();
+    const start = reader.pos;
+    while (true) {
+        reader.pos = std.mem.indexOfPos(u8, reader.buf, reader.pos, "?>") orelse reader.buf.len;
+        if (reader.pos < reader.buf.len) {
+            try reader.spans.append(reader.gpa, .{ .start = start, .end = reader.pos });
+            reader.pos += "?>".len;
+            return;
+        }
+        try reader.more();
+        if (reader.pos == reader.buf.len) return reader.fatal(.pi_unclosed, reader.pos);
+    }
+}
+
+fn checkPi(reader: *Reader) !void {
+    const target = reader.piTargetUnchecked();
+    if (std.ascii.eqlIgnoreCase(target, "xml")) {
+        return reader.fatal(.pi_target_disallowed, reader.piTargetPos());
+    }
+    try reader.checkName(target, reader.piTargetPos());
+    if (reader.options.namespace_aware and std.mem.indexOfScalar(u8, target, ':') != null) {
+        return reader.fatal(.name_malformed, reader.piTargetPos());
+    }
+    if (reader.piTargetEndPos() == reader.piDataPos() and reader.piDataEndPos() > reader.piDataPos()) {
+        return reader.fatal(.pi_missing_space, reader.piDataPos());
+    }
+    try reader.checkChars(reader.piDataUnchecked(), reader.piDataPos());
+}
+
+fn readText(reader: *Reader) !void {
+    while (reader.pos < reader.buf.len) {
+        const b = reader.buf[reader.pos];
+        if (b == '&' or b == '<') return;
+        // We don't care about validating UTF-8 strictly here.
+        // We just don't want to end in the possible middle of a codepoint.
+        const nb: usize = if (b < 0x80) {
+            reader.pos += 1;
+            continue;
+        } else if (b < 0xE0)
+            2
+        else if (b < 0xF0)
+            3
+        else
+            4;
+        if (reader.pos + nb > reader.buf.len) try reader.more();
+        reader.pos = @min(reader.pos + nb, reader.buf.len);
+    }
+    // We don't want to end on a CR right before an LF, or CRLF normalization will not be possible.
+    if (reader.pos > 0 and reader.buf[reader.pos - 1] == '\r') {
+        try reader.more();
+        if (reader.pos < reader.buf.len and reader.buf[reader.pos] == '\n') {
+            reader.pos += 1;
+        }
+        return;
+    }
+    // We also don't want to end in the middle of ']]>' which checkText needs to reject.
+    if (reader.pos > 0 and reader.buf[reader.pos - 1] == ']') {
+        try reader.more();
+        if (std.mem.startsWith(u8, reader.buf[reader.pos..], "]>")) {
+            reader.pos += "]>".len;
+        }
+        return;
+    }
+}
+
+fn checkText(reader: *Reader) !void {
+    const s = reader.textUnchecked();
+    const pos = reader.textPos();
+    try reader.validateUtf8(s, pos);
+    for (s, 0..) |c, i| {
+        switch (c) {
+            '\t',
+            '\n',
+            '\r',
+            0x20...(']' - 1),
+            (']' + 1)...0xEE,
+            0xF0...0xFF,
+            => {},
+            ']' => {
+                if (std.mem.startsWith(u8, s[i + 1 ..], "]>")) {
+                    return reader.fatal(.text_cdata_end_disallowed, pos + i);
+                }
+            },
+            0xEF => {
+                // We already validated for correct UTF-8, so we know 2 bytes follow.
+                // The Unicode codepoints U+FFFE and U+FFFF are not allowed as characters:
+                // U+FFFE: EF BF BE
+                // U+FFFF: EF BF BF
+                if (s[i + 1] == 0xBF and (s[i + 2] == 0xBE or s[i + 2] == 0xBF)) {
+                    return reader.fatal(.illegal_character, pos + i);
+                }
+            },
+            else => return reader.fatal(.illegal_character, pos + i),
+        }
+    }
+}
+
+fn readCdata(reader: *Reader) !void {
+    const start = reader.pos;
+    while (true) {
+        reader.pos = std.mem.indexOfPos(u8, reader.buf, reader.pos, "]]>") orelse reader.buf.len;
+        if (reader.pos < reader.buf.len) {
+            try reader.spans.append(reader.gpa, .{ .start = start, .end = reader.pos });
+            reader.pos += "]]>".len;
+            return;
+        }
+        try reader.more();
+        if (reader.pos == reader.buf.len) return reader.fatal(.cdata_unclosed, reader.pos);
+    }
+}
+
+fn checkCdata(reader: *Reader) !void {
+    try reader.checkChars(reader.cdataUnchecked(), reader.cdataPos());
+}
+
+fn checkEntityReference(reader: *Reader) !void {
+    if (!predefined_entities.has(reader.entityReferenceNameUnchecked())) {
+        return reader.fatal(.entity_reference_undefined, reader.entityReferenceNamePos());
+    }
+}
+
+fn readCharacterReference(reader: *Reader) !void {
+    const start = reader.pos;
+    while (true) {
+        while (reader.pos < reader.buf.len) {
+            switch (reader.buf[reader.pos]) {
+                '0'...'9', 'A'...'Z', 'a'...'z' => reader.pos += 1,
+                else => {
+                    try reader.spans.append(reader.gpa, .{ .start = start, .end = reader.pos });
+                    return;
+                },
+            }
+        }
+        try reader.more();
+        if (reader.pos == reader.buf.len) {
+            try reader.spans.append(reader.gpa, .{ .start = start, .end = reader.pos });
+            return;
+        }
+    }
+}
+
+fn checkCharacterReference(reader: *Reader) !void {
+    const ref = reader.characterReferenceNameUnchecked();
+    const pos = reader.characterReferenceNamePos();
+    const c = if (std.mem.startsWith(u8, ref, "x"))
+        std.fmt.parseInt(u21, ref["x".len..], 16) catch return reader.fatal(.character_reference_malformed, pos)
+    else
+        std.fmt.parseInt(u21, ref, 10) catch return reader.fatal(.character_reference_malformed, pos);
+    if (!isChar(c)) return reader.fatal(.character_reference_malformed, pos);
+    reader.character = c;
+}
+
+fn readName(reader: *Reader) !void {
+    const start = reader.pos;
+    while (true) {
+        while (reader.pos < reader.buf.len) {
+            switch (reader.buf[reader.pos]) {
+                'A'...'Z', 'a'...'z', '0'...'9', ':', '_', '-', '.', 0x80...0xFF => reader.pos += 1,
+                else => {
+                    try reader.spans.append(reader.gpa, .{ .start = start, .end = reader.pos });
+                    return;
+                },
+            }
+        }
+        try reader.more();
+        if (reader.pos == reader.buf.len) {
+            try reader.spans.append(reader.gpa, .{ .start = start, .end = reader.pos });
+            return;
+        }
+    }
+}
+
+fn readPair(reader: *Reader) !void {
+    try reader.readName();
+    try reader.readSpace();
+    if (!try reader.readMatch("=")) return reader.fatal(.expected_equals, reader.pos);
+    try reader.readSpace();
+    try reader.readQuotedValue();
+}
+
+fn readQuotedValue(reader: *Reader) !void {
+    const quote = quote: {
+        if (reader.pos == reader.buf.len) {
+            try reader.more();
+            if (reader.pos == reader.buf.len) return reader.fatal(.expected_quote, reader.pos);
+        }
+        break :quote switch (reader.buf[reader.pos]) {
+            '"', '\'' => |c| c,
+            else => return reader.fatal(.expected_quote, reader.pos),
+        };
+    };
+    reader.pos += 1;
+    const start = reader.pos;
+    while (true) {
+        reader.pos = std.mem.indexOfScalarPos(u8, reader.buf, reader.pos, quote) orelse reader.buf.len;
+        if (reader.pos < reader.buf.len) {
+            try reader.spans.append(reader.gpa, .{ .start = start, .end = reader.pos });
+            reader.pos += 1;
+            return;
+        }
+        try reader.more();
+        if (reader.pos == reader.buf.len) return reader.fatal(.missing_end_quote, reader.pos);
+    }
+}
+
+fn readMatch(reader: *Reader, needle: []const u8) !bool {
+    if (reader.pos + needle.len > reader.buf.len) {
+        try reader.more();
+        if (reader.pos + needle.len > reader.buf.len) return false;
+    }
+    if (std.mem.eql(u8, reader.buf[reader.pos..][0..needle.len], needle)) {
+        reader.pos += needle.len;
+        return true;
+    }
+    return false;
+}
+
+fn readSpace(reader: *Reader) !void {
+    while (true) {
+        while (reader.pos < reader.buf.len) {
+            switch (reader.buf[reader.pos]) {
+                ' ', '\t', '\r', '\n' => reader.pos += 1,
+                else => return,
+            }
+        }
+        try reader.more();
+        if (reader.pos == reader.buf.len) return;
+    }
+}
+
+fn checkName(reader: *Reader, s: []const u8, pos: usize) !void {
+    const view = try reader.viewUtf8(s, pos);
+    var iter = view.iterator();
+    if (!isNameStartChar(iter.nextCodepoint() orelse return reader.fatal(.name_malformed, pos))) {
+        return reader.fatal(.name_malformed, pos);
+    }
+    while (iter.nextCodepoint()) |c| {
+        if (!isNameChar(c)) return reader.fatal(.name_malformed, pos);
+    }
+}
+
+fn checkNcName(reader: *Reader, s: []const u8, pos: usize) !void {
+    if (s.len == 0 or !isNameStartChar(s[0]) or std.mem.indexOfScalar(u8, s, ':') != null) {
+        return reader.fatal(.name_malformed, pos);
+    }
+}
+
+fn isNameStartChar(c: u21) bool {
+    return switch (c) {
+        ':',
+        'A'...'Z',
+        '_',
+        'a'...'z',
+        0xC0...0xD6,
+        0xD8...0xF6,
+        0xF8...0x2FF,
+        0x370...0x37D,
+        0x37F...0x1FFF,
+        0x200C...0x200D,
+        0x2070...0x218F,
+        0x2C00...0x2FEF,
+        0x3001...0xD7FF,
+        0xF900...0xFDCF,
+        0xFDF0...0xFFFD,
+        0x10000...0xEFFFF,
+        => true,
+        else => false,
+    };
+}
+
+fn isNameChar(c: u21) bool {
+    return isNameStartChar(c) or switch (c) {
+        '-',
+        '.',
+        '0'...'9',
+        0xB7,
+        0x0300...0x036F,
+        0x203F...0x2040,
+        => true,
+        else => false,
+    };
+}
+
+fn checkChars(reader: *Reader, s: []const u8, pos: usize) !void {
+    try reader.validateUtf8(s, pos);
+    for (s, 0..) |c, i| {
+        switch (c) {
+            '\t', '\n', '\r', 0x20...0xEE, 0xF0...0xFF => {},
+            0xEF => {
+                // We already validated for correct UTF-8, so we know 2 bytes follow.
+                // The Unicode codepoints U+FFFE and U+FFFF are not allowed as characters:
+                // U+FFFE: EF BF BE
+                // U+FFFF: EF BF BF
+                if (s[i + 1] == 0xBF and (s[i + 2] == 0xBE or s[i + 2] == 0xBF)) {
+                    return reader.fatal(.illegal_character, pos + i);
+                }
+            },
+            else => return reader.fatal(.illegal_character, pos + i),
+        }
+    }
+}
+
+fn isChar(c: u21) bool {
+    return switch (c) {
+        0x9,
+        0xA,
+        0xD,
+        0x20...0xD7FF,
+        0xE000...0xFFFD,
+        0x10000...0x10FFFF,
+        => true,
+        else => false,
+    };
+}
+
+fn skipBom(reader: *Reader) !void {
+    const bom = "\u{FEFF}";
+    if (std.mem.startsWith(u8, reader.buf[reader.pos..], bom)) {
+        reader.pos += bom.len;
+        try reader.shift();
+    }
+}
+
+fn skipSpace(reader: *Reader) !void {
+    while (true) {
+        while (reader.pos < reader.buf.len) {
+            switch (reader.buf[reader.pos]) {
+                ' ', '\t', '\r', '\n' => reader.pos += 1,
+                else => {
+                    try reader.shift();
+                    return;
+                },
+            }
+        }
+        try reader.shift();
+        if (reader.pos == reader.buf.len) return;
+    }
+}
+
+fn validateUtf8(reader: *Reader, s: []const u8, pos: usize) !void {
+    if (reader.options.assume_valid_utf8) return;
+    if (!std.unicode.utf8ValidateSlice(s)) return reader.fatalInvalidUtf8(s, pos);
+}
+
+fn viewUtf8(reader: *Reader, s: []const u8, pos: usize) !std.unicode.Utf8View {
+    if (reader.options.assume_valid_utf8) return std.unicode.Utf8View.initUnchecked(s);
+    return std.unicode.Utf8View.init(s) catch reader.fatalInvalidUtf8(s, pos);
+}
+
+fn fatalInvalidUtf8(reader: *Reader, s: []const u8, pos: usize) error{MalformedXml} {
+    // We need to backtrack and redo the UTF-8 validation to set the correct
+    // error location; the standard "validate UTF-8" function doesn't provide
+    // an index for the invalid data.
+    var invalid_pos: usize = 0;
+    while (true) {
+        const cp_len = std.unicode.utf8ByteSequenceLength(s[invalid_pos]) catch break;
+        if (invalid_pos + cp_len > s.len) break;
+        if (!std.unicode.utf8ValidateSlice(s[invalid_pos..][0..cp_len])) break;
+        invalid_pos += cp_len;
+    }
+    return reader.fatal(.invalid_utf8, pos + invalid_pos);
+}
+
+const base_read_size = 4096;
+
+fn shift(reader: *Reader) !void {
+    if (reader.options.location_aware) {
+        reader.loc.update(reader.buf[0..reader.pos]);
+    }
+
+    reader.buf = try reader.source.move(reader.pos, base_read_size);
+    reader.pos = 0;
+    reader.spans.clearRetainingCapacity();
+    reader.attributes.clearRetainingCapacity();
+    reader.q_attributes.clearRetainingCapacity();
+
+    if (reader.node == .element_end) {
+        if (reader.options.namespace_aware) {
+            var prefix_bindings = reader.ns_prefixes.pop();
+            prefix_bindings.deinit(reader.gpa);
+        }
+        const element_name_start = reader.element_names.pop();
+        reader.strings.shrinkRetainingCapacity(@intFromEnum(element_name_start));
+    }
+}
+
+fn more(reader: *Reader) !void {
+    reader.buf = try reader.source.move(0, reader.buf.len * 2);
+}
+
+fn fatal(reader: *Reader, error_code: ErrorCode, error_pos: usize) error{MalformedXml} {
+    reader.state = .invalid;
+    reader.error_code = error_code;
+    reader.error_pos = error_pos;
+    return error.MalformedXml;
+}
+
+const QNameContext = struct {
+    pub fn hash(ctx: @This(), qname: QName) u32 {
+        _ = ctx;
+        var w = std.hash.Wyhash.init(0);
+        w.update(qname.ns);
+        w.update(qname.local);
+        return @truncate(w.final());
+    }
+
+    pub fn eql(ctx: @This(), a: QName, b: QName, b_index: usize) bool {
+        _ = ctx;
+        _ = b_index;
+        return std.mem.eql(u8, a.ns, b.ns) and std.mem.eql(u8, a.local, b.local);
+    }
+};
+
+const BufSpan = struct {
+    start: usize,
+    end: usize,
+};
+
+fn bufSlice(reader: Reader, span: BufSpan) []const u8 {
+    return reader.buf[span.start..span.end];
+}
+
+const StringIndex = enum(usize) { empty = 0, _ };
+
+const StringIndexAdapter = struct {
+    strings: []const u8,
+
+    pub fn hash(ctx: @This(), key: []const u8) u32 {
+        _ = ctx;
+        return @truncate(std.hash.Wyhash.hash(0, key));
+    }
+
+    pub fn eql(ctx: @This(), a: []const u8, b: StringIndex, b_index: usize) bool {
+        _ = b_index;
+        const b_val = std.mem.sliceTo(ctx.strings[@intFromEnum(b)..], 0);
+        return std.mem.eql(u8, a, b_val);
+    }
+};
+
+fn addString(reader: *Reader, s: []const u8) !StringIndex {
+    try reader.strings.ensureUnusedCapacity(reader.gpa, s.len + 1);
+    reader.strings.appendAssumeCapacity(0);
+    const start = reader.strings.items.len;
+    reader.strings.appendSliceAssumeCapacity(s);
+    return @enumFromInt(start);
+}
+
+fn string(reader: Reader, index: StringIndex) []const u8 {
+    return std.mem.sliceTo(reader.strings.items[@intFromEnum(index)..], 0);
+}
diff --git a/src/Scanner.zig b/src/Scanner.zig
deleted file mode 100644
index 3939ea9..0000000
--- a/src/Scanner.zig
+++ /dev/null
@@ -1,2045 +0,0 @@
-//! A simple, low-level streaming XML parser.
-//!
-//! The design of the parser is strongly inspired by
-//! [Yxml](https://dev.yorhel.nl/yxml). Codepoints are fed to the parser one by one
-//! using the `next` function, then the `endInput` function should be used to
-//! check that the parser is in a valid state for the end of input (e.g. not in
-//! the middle of parsing an element). The tokens returned by the parser
-//! reference the input data using `pos` ranges (the meaning of `pos` depends
-//! on the meaning of the `len` passed to `next`).
-//!
-//! A higher-level parser which wants to do anything useful with the returned
-//! tokens will need to store the input text fed to the `next` function in some
-//! sort of buffer. If the document is stored entirely in memory, this buffer
-//! could be the document content itself. If the document is being read in a
-//! streaming manner, however, then an auxiliary buffer will be needed. To
-//! avoid requiring such higher-level APIs to maintain an unbounded input
-//! buffer, the `resetPos` function exists to reset `pos` to 0, if possible.
-//! The approach taken by `TokenReader` is to call `resetPos` after every
-//! token, and after reaching a state where space for a further codepoint is
-//! not guaranteed. With this approach, the length of the buffer bounds the
-//! maximum size of "unsplittable" content, such as element and attribute
-//! names, but not "splittable" content such as element text content and
-//! attribute values.
-//!
-//! Intentional (permanent) limitations (which can be addressed by
-//! higher-level APIs, such as `Reader`):
-//!
-//! - Does not validate that corresponding open and close tags match.
-//! - Does not validate that attribute names are not duplicated.
-//! - Does not do any special handling of namespaces.
-//! - Does not perform any sort of processing on text content or attribute
-//!   values (including normalization, expansion of entities, etc.).
-//!   - However, note that entity and character references in text content and
-//!     attribute values _are_ validated for correct syntax, although their
-//!     content is not (they may reference non-existent entities).
-//! - Does not process DTDs in any way besides parsing them (TODO: see below).
-//!
-//! Unintentional (temporary) limitations (which will be removed over time):
-//!
-//! - Does not support `DOCTYPE` at all (using one will result in an error).
-//! - Not extensively tested/fuzzed.
-
-/// The data for the most recently returned token.
-token_data: Token.Data = undefined,
-/// The current state of the scanner.
-state: State = .start,
-/// Data associated with the current state of the scanner.
-state_data: State.Data = undefined,
-/// The current position in the input.
-///
-/// The meaning of this position is determined by the meaning of the `len`
-/// value passed to `next`, which is determined by the user. For example, a
-/// user with a byte slice or reader would probably want to pass `len` as the
-/// number of bytes making up the codepoint, which would make `pos` a byte
-/// offset.
-pos: usize = 0,
-/// The current element nesting depth.
-depth: usize = 0,
-/// Whether the root element has been seen already.
-seen_root_element: bool = false,
-
-const std = @import("std");
-const testing = std.testing;
-const unicode = std.unicode;
-const syntax = @import("syntax.zig");
-
-const Scanner = @This();
-
-/// A range of byte positions in the input.
-pub const Range = struct {
-    /// The start of the range (inclusive).
-    start: usize,
-    /// The end of the range (exclusive).
-    end: usize,
-
-    pub fn isEmpty(self: Range) bool {
-        return self.start == self.end;
-    }
-
-    pub fn format(self: Range, _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
-        try writer.print("{}..{}", .{ self.start, self.end });
-    }
-};
-
-/// A single XML token.
-///
-/// The choice of tokens is designed to allow the buffer position to be reset as
-/// often as reasonably possible ("forgetting" any range information before the
-/// reset), supported by the following design decisions:
-///
-/// - Tokens contain only the immediately necessary context: for example, the
-///   `attribute_content` token does not store any information about the
-///   attribute name, since it may have been processed many resets ago (if the
-///   attribute content is very long).
-/// - Multiple `content` tokens may be returned for a single enclosing context
-///   (e.g. element or attribute) if the buffer is reset in the middle of
-///   content or there are other necessary intervening factors, such as CDATA
-///   in the middle of normal (non-CDATA) element content.
-///
-/// For efficiency (avoiding copying when passing around tokens), this is
-/// merely an enum specifying the token type. The actual token data is available
-/// in `Token.Data`, in the scanner's `token_data` field. The `fullToken`
-/// function can be used to get a `Token.Full`, which is a tagged union type and
-/// may be easier to consume in certain circumstances.
-pub const Token = enum {
-    /// Continue processing: no new token to report yet.
-    ok,
-    /// XML declaration.
-    xml_declaration,
-    /// Element start tag.
-    element_start,
-    /// Element content.
-    element_content,
-    /// Element end tag.
-    element_end,
-    /// End of an empty element.
-    element_end_empty,
-    /// Attribute start.
-    attribute_start,
-    /// Attribute value content.
-    attribute_content,
-    /// Comment start.
-    comment_start,
-    /// Comment content.
-    comment_content,
-    /// Processing instruction (PI) start.
-    pi_start,
-    /// PI content.
-    pi_content,
-
-    /// The data associated with a token.
-    ///
-    /// Even token types which have no associated data are represented here, to
-    /// provide some additional safety in safe build modes (where it can be
-    /// checked whether the caller is referencing the correct data field).
-    pub const Data = union {
-        ok: void,
-        xml_declaration: XmlDeclaration,
-        element_start: ElementStart,
-        element_content: ElementContent,
-        element_end: ElementEnd,
-        element_end_empty: void,
-        attribute_start: AttributeStart,
-        attribute_content: AttributeContent,
-        comment_start: void,
-        comment_content: CommentContent,
-        pi_start: PiStart,
-        pi_content: PiContent,
-    };
-
-    /// A token type plus data represented as a tagged union.
-    pub const Full = union(Token) {
-        ok,
-        xml_declaration: XmlDeclaration,
-        element_start: ElementStart,
-        element_content: ElementContent,
-        element_end: ElementEnd,
-        element_end_empty,
-        attribute_start: AttributeStart,
-        attribute_content: AttributeContent,
-        comment_start,
-        comment_content: CommentContent,
-        pi_start: PiStart,
-        pi_content: PiContent,
-    };
-
-    pub const XmlDeclaration = struct {
-        version: Range,
-        encoding: ?Range = null,
-        standalone: ?bool = null,
-    };
-
-    pub const ElementStart = struct {
-        name: Range,
-    };
-
-    pub const ElementContent = struct {
-        content: Content,
-    };
-
-    pub const ElementEnd = struct {
-        name: Range,
-    };
-
-    pub const AttributeStart = struct {
-        name: Range,
-    };
-
-    pub const AttributeContent = struct {
-        content: Content,
-        final: bool = false,
-    };
-
-    pub const CommentContent = struct {
-        content: Range,
-        final: bool = false,
-    };
-
-    pub const PiStart = struct {
-        target: Range,
-    };
-
-    pub const PiContent = struct {
-        content: Range,
-        final: bool = false,
-    };
-
-    /// A bit of content of an element or attribute.
-    pub const Content = union(enum) {
-        /// Raw text content (does not contain any entities).
-        text: Range,
-        /// A Unicode codepoint.
-        codepoint: u21,
-        /// An entity reference, such as `&amp;`. The range covers the name (`amp`).
-        entity: Range,
-    };
-};
-
-/// Returns the full token (including data) from the most recent call to `next`
-/// or `resetPos`. `token` must be the token returned from the last call to one
-/// of those functions.
-///
-/// ---
-///
-/// API note: the use of `self: *const Scanner` rather than `self: Scanner` is
-/// important to elimiate a potential footgun with the following code:
-///
-/// ```
-/// const full_token = scanner.fullToken(try scanner.next(c, len));
-/// ```
-///
-/// If `self: Scanner` is used, then Zig will evaluate `scanner` in its current
-/// state (for the expression `scanner.fullToken`) before calling
-/// `scanner.next`. This leads to the result being incorrect, since the `scanner`
-/// used for the `fullToken` call will have the old token data.
-pub fn fullToken(self: *const Scanner, token: Token) Token.Full {
-    return switch (token) {
-        inline else => |tag| @unionInit(Token.Full, @tagName(tag), @field(self.token_data, @tagName(tag))),
-    };
-}
-
-/// The possible states of the parser.
-///
-/// The parser is designed as a state machine. A state may need to hold
-/// associated data to allow the necessary information to be included in a
-/// future token. One shortcut used to avoid creating many unnecessary
-/// additional states is to store a `left` byte slice tracking expected bytes
-/// remaining in a state (the slice is always pointing to static strings, so
-/// there are no lifetime considerations): for example, the word "version" in
-/// an XML declaration is parsed in the xml_decl_version_name state, and
-/// successive bytes are validated using the `left` slice (e.g. after parsing
-/// "v", left is "ersion", so that when we handle the next character, we can
-/// fail parsing if it is not "e", and then set `left` to "rsion", and so on).
-pub const State = enum {
-    /// Start of document.
-    start,
-    /// Start of document after BOM.
-    start_after_bom,
-
-    /// Same as unknown_start, but also allows the XML declaration.
-    start_unknown_start,
-    /// Start of a PI or XML declaration after '<?'.
-    ///
-    /// Some part of 'xml' may have been matched. If this is not matched, the
-    /// state will transition to a normal `pi_start`.
-    ///
-    /// Uses `start`, `left`.
-    pi_or_xml_decl_start,
-    /// Start of a PI or XML declaration after '<?xml'.
-    ///
-    /// Uses `start`.
-    pi_or_xml_decl_start_after_xml,
-
-    /// XML declaration after '<?xml '.
-    xml_decl,
-    /// XML declaration within 'version'.
-    ///
-    /// Uses `left`.
-    xml_decl_version_name,
-    /// XML declaration after 'version'.
-    xml_decl_after_version_name,
-    /// XML declaration after '=' in version info.
-    xml_decl_after_version_equals,
-    /// XML version value with some part of '1.' consumed.
-    ///
-    /// Uses `start`, `quote`, `left`.
-    xml_decl_version_value_start,
-    /// XML declaration version value after '1.'.
-    ///
-    /// Uses `start`, `quote`.
-    xml_decl_version_value,
-    /// XML declaration after version value.
-    ///
-    /// Uses `version`.
-    xml_decl_after_version_value,
-    /// XML declaration after version info.
-    ///
-    /// Uses `version`.
-    xml_decl_after_version,
-    /// XML declaration within 'encoding'.
-    ///
-    /// Uses `version`, `left`.
-    xml_decl_encoding_name,
-    /// XML declaration after 'encoding'.
-    ///
-    /// Uses `version`.
-    xml_decl_after_encoding_name,
-    /// XML declaration after '=' in encoding declaration.
-    ///
-    /// Uses `version`.
-    xml_decl_after_encoding_equals,
-    /// XML declaration encoding declaration value start (after opening quote).
-    ///
-    /// Uses `version`, `start`, `quote`.
-    xml_decl_encoding_value_start,
-    /// XML declaration encoding declaration value (after first character).
-    ///
-    /// Uses `version`, `start`, `quote`.
-    xml_decl_encoding_value,
-    /// XML declaration after encoding value.
-    ///
-    /// Uses `version`, `encoding`.
-    xml_decl_after_encoding_value,
-    /// XML declaration after encoding declaration.
-    ///
-    /// Uses `version`, `encoding`.
-    xml_decl_after_encoding,
-    /// XML declaration within 'standalone'.
-    ///
-    /// Uses `version`, `encoding`, `left`.
-    xml_decl_standalone_name,
-    /// XML declaration after 'standalone'.
-    ///
-    /// Uses `version`, `encoding`.
-    xml_decl_after_standalone_name,
-    /// XML declaration after '=' in standalone declaration.
-    ///
-    /// Uses `version`, `encoding`.
-    xml_decl_after_standalone_equals,
-    /// XML declaration standalone declaration value start (after opening quote).
-    ///
-    /// Uses `version`, `encoding`, `quote`.
-    xml_decl_standalone_value_start,
-    /// XML declaration standalone declaration value after some part of 'yes' or 'no'.
-    ///
-    /// Uses `quote`, `left`.
-    xml_decl_standalone_value,
-    /// XML declaration standalone declaration value after full 'yes' or 'no'.
-    ///
-    /// Uses `quote`.
-    xml_decl_standalone_value_end,
-    /// XML declaration after standalone declaration.
-    xml_decl_after_standalone,
-    /// End of XML declaration after '?'.
-    xml_decl_end,
-    /// Start of document after XML declaration.
-    start_after_xml_decl,
-
-    /// After some part of '<!DOCTYPE '.
-    ///
-    /// Ues `left`.
-    doctype_start,
-
-    /// Top-level document content (outside the root element).
-    document_content,
-    /// A '<' has been encountered, but we don't know if it's an element, comment, etc.
-    unknown_start,
-    /// A '<!' has been encountered.
-    unknown_start_bang,
-
-    /// A '<!-' has been encountered.
-    comment_before_start,
-    /// Comment.
-    ///
-    /// Uses `start`.
-    comment,
-    /// Comment after consuming one '-'.
-    ///
-    /// Uses `start`, `end`.
-    comment_maybe_before_end,
-    /// Comment after consuming '--'.
-    comment_before_end,
-
-    /// PI after '<?'.
-    pi,
-    /// In PI target name.
-    ///
-    /// Uses `start`.
-    pi_target,
-    /// After PI target.
-    pi_after_target,
-    /// In PI content after target name.
-    ///
-    /// Uses `start`.
-    pi_content,
-    /// Possible end of PI after '?'.
-    ///
-    /// Uses `start`, `end`.
-    pi_maybe_end,
-
-    /// A '<![' (and possibly some part of 'CDATA[' after it) has been encountered.
-    ///
-    /// Uses `left`.
-    cdata_before_start,
-    /// CDATA.
-    ///
-    /// Uses `start`.
-    cdata,
-    /// CDATA after one ']'.
-    ///
-    /// Uses `start`, `end`.
-    cdata_maybe_before_end,
-    /// In CDATA content after more than one ']'.
-    ///
-    /// Uses `start`, `end`.
-    cdata_maybe_end,
-
-    /// Name of element start tag.
-    ///
-    /// Uses `start`.
-    element_start_name,
-    /// In element start tag after name (and possibly after one or more attributes).
-    element_start_after_name,
-    /// In element start tag after encountering '/' (indicating an empty element).
-    element_start_empty,
-
-    /// Attribute name.
-    ///
-    /// Uses `start`.
-    attribute_name,
-    /// After attribute name but before '='.
-    attribute_after_name,
-    /// After attribute '='.
-    attribute_after_equals,
-    /// Attribute value.
-    ///
-    /// Uses `start`, `quote`.
-    attribute_content,
-    /// Attribute value after encountering '&'.
-    ///
-    /// Uses `quote`.
-    attribute_content_ref_start,
-    /// Attribute value within an entity reference name.
-    ///
-    /// Uses `start`, `quote`.
-    attribute_content_entity_ref_name,
-    /// Attribute value after encountering '&#'.
-    ///
-    /// Uses `quote`.
-    attribute_content_char_ref_start,
-    /// Attribute value within a character reference.
-    ///
-    /// Uses `hex`, `value`, `quote`.
-    attribute_content_char_ref,
-    /// After attribute value.
-    attribute_after_content,
-
-    /// Element end tag after consuming '</'.
-    element_end,
-    /// Name of element end tag.
-    ///
-    /// Uses `start`.
-    element_end_name,
-    /// In element end tag after name.
-    element_end_after_name,
-
-    /// Element content (text).
-    ///
-    /// Uses `start`.
-    content,
-    /// Element content after encountering one ']'.
-    ///
-    /// Uses `start`.
-    content_cdata_maybe_before_end,
-    /// Element content after encountering more than one ']'.
-    ///
-    /// Uses `start`.
-    content_cdata_maybe_end,
-    /// Element content after encountering '&'.
-    content_ref_start,
-    /// Element content within an entity reference name.
-    ///
-    /// Uses `start`.
-    content_entity_ref_name,
-    /// Element content after encountering '&#'.
-    content_char_ref_start,
-    /// Element content within a character reference.
-    ///
-    /// Uses `hex`, `value`.
-    content_char_ref,
-
-    /// A syntax error has been encountered.
-    ///
-    /// This is for safety, since the parser has no error recovery: to avoid
-    /// invalid tokens being emitted, the parser is put in this state after any
-    /// syntax error, and will always emit a syntax error in this state.
-    @"error",
-
-    /// Data associated with the scanner state.
-    ///
-    /// A more idiomatic pattern for Zig would be to make `State` a tagged
-    /// union and have this data contained within the states that use it.
-    /// However, the tagged union pattern turns out to be worse for
-    /// performance due to the extra copying required, especially since many
-    /// states preserve similar data values across transitions (for example,
-    /// all attribute value states maintain the `quote` field).
-    pub const Data = struct {
-        start: usize,
-        end: usize,
-        left: []const u8,
-        // Attribute value
-        quote: u8,
-        // Character reference
-        hex: bool,
-        value: u21,
-        // XML declaration
-        version: Range,
-        encoding: ?Range,
-    };
-};
-
-pub const Error = error{
-    DoctypeNotSupported,
-    InvalidCharacterReference,
-    SyntaxError,
-};
-
-/// Accepts a single codepoint of input, returning the token found or an error.
-///
-/// The `len` argument determines how `pos` (and hence any ranges in the
-/// returned tokens) behaves. A byte-oriented user will probably want to pass
-/// the number of bytes making up the codepoint so that all ranges are byte
-/// ranges, but it is also valid to use other interpretations (e.g. the user
-/// could always pass 1 if the input is already parsed into codepoints).
-pub fn next(self: *Scanner, c: u21, len: usize) Error!Token {
-    const token = self.nextNoAdvance(c, len) catch |e| {
-        self.state = .@"error";
-        return e;
-    };
-    self.pos += len;
-    return token;
-}
-
-/// Returns the next token (or an error) without advancing the internal
-/// position (which should only be advanced in case of success: basically this
-/// function is needed because Zig has no "successdefer" to advance `pos` only
-/// in case of success).
-fn nextNoAdvance(self: *Scanner, c: u21, len: usize) Error!Token {
-    // It is easier to set the token_data to ok here rather than doing it
-    // individually each time before returning an ok token.
-    self.token_data = .{ .ok = {} };
-
-    switch (self.state) {
-        .start => if (c == 0xFEFF) {
-            self.state = .start_after_bom;
-            return .ok;
-        } else if (c == '<') {
-            self.state = .start_unknown_start;
-            return .ok;
-        } else if (syntax.isSpace(c)) {
-            self.state = .start_after_xml_decl;
-            return .ok;
-        },
-
-        .start_after_bom => if (c == '<') {
-            self.state = .start_unknown_start;
-            return .ok;
-        } else if (syntax.isSpace(c)) {
-            self.state = .start_after_xml_decl;
-            return .ok;
-        },
-
-        .start_unknown_start => if (syntax.isNameStartChar(c)) {
-            self.state = .element_start_name;
-            self.state_data.start = self.pos;
-            return .ok;
-        } else if (c == '?') {
-            self.state = .pi_or_xml_decl_start;
-            self.state_data.start = self.pos + len;
-            self.state_data.left = "xml";
-            return .ok;
-        } else if (c == '!') {
-            self.state = .unknown_start_bang;
-            return .ok;
-        },
-
-        .pi_or_xml_decl_start => if (c == self.state_data.left[0]) {
-            if (self.state_data.left.len == 1) {
-                self.state = .pi_or_xml_decl_start_after_xml;
-                // self.state_data.start = self.state_data.start;
-            } else {
-                self.state_data.left = self.state_data.left[1..];
-            }
-            return .ok;
-        } else if (syntax.isNameStartChar(c) or (syntax.isNameChar(c) and self.pos > self.state_data.start)) {
-            self.state = .pi_target;
-            // self.state_data.start = self.state_data.start;
-            return .ok;
-        } else if (syntax.isSpace(c) and self.pos > self.state_data.start) {
-            const target = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .pi_after_target;
-            self.token_data = .{ .pi_start = .{ .target = target } };
-            return .pi_start;
-        } else if (c == '?' and self.pos > self.state_data.start) {
-            const target = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .pi_maybe_end;
-            self.state_data.start = self.pos;
-            self.state_data.end = self.pos;
-            self.token_data = .{ .pi_start = .{ .target = target } };
-            return .pi_start;
-        },
-
-        .pi_or_xml_decl_start_after_xml => if (syntax.isSpace(c)) {
-            self.state = .xml_decl;
-            return .ok;
-        } else if (syntax.isNameChar(c)) {
-            self.state = .pi_target;
-            // self.state_data.start = self.state_data.start;
-            return .ok;
-        },
-
-        .xml_decl => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == 'v') {
-            self.state = .xml_decl_version_name;
-            self.state_data.left = "ersion";
-            return .ok;
-        },
-
-        .xml_decl_version_name => if (c == self.state_data.left[0]) {
-            if (self.state_data.left.len == 1) {
-                self.state = .xml_decl_after_version_name;
-            } else {
-                self.state_data.left = self.state_data.left[1..];
-            }
-            return .ok;
-        },
-
-        .xml_decl_after_version_name => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == '=') {
-            self.state = .xml_decl_after_version_equals;
-            return .ok;
-        },
-
-        .xml_decl_after_version_equals => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == '"' or c == '\'') {
-            self.state = .xml_decl_version_value_start;
-            self.state_data.start = self.pos + len;
-            self.state_data.quote = @intCast(c);
-            self.state_data.left = "1.";
-            return .ok;
-        },
-
-        .xml_decl_version_value_start => if (c == self.state_data.left[0]) {
-            if (self.state_data.left.len == 1) {
-                self.state = .xml_decl_version_value;
-                // self.state_data.start = self.state_data.start;
-                // self.state_data.quote = self.state_data.quote;
-            } else {
-                self.state_data.left = self.state_data.left[1..];
-            }
-            return .ok;
-        },
-
-        .xml_decl_version_value => if (c == self.state_data.quote and self.pos > self.state_data.start + "1.".len) {
-            self.state = .xml_decl_after_version_value;
-            self.state_data.version = .{ .start = self.state_data.start, .end = self.pos };
-            return .ok;
-        } else if (syntax.isDigit(c)) {
-            return .ok;
-        },
-
-        .xml_decl_after_version_value => if (syntax.isSpace(c)) {
-            self.state = .xml_decl_after_version;
-            // self.state_data.version = self.state_data.version;
-            return .ok;
-        } else if (c == '?') {
-            const version = self.state_data.version;
-            self.state = .xml_decl_end;
-            self.token_data = .{ .xml_declaration = .{ .version = version, .encoding = null, .standalone = null } };
-            return .xml_declaration;
-        },
-
-        .xml_decl_after_version => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == 'e') {
-            self.state = .xml_decl_encoding_name;
-            // self.state_data.version = self.state_data.version;
-            self.state_data.left = "ncoding";
-            return .ok;
-        } else if (c == 's') {
-            self.state = .xml_decl_standalone_name;
-            // self.state_data.version = self.state_data.version;
-            self.state_data.encoding = null;
-            self.state_data.left = "tandalone";
-            return .ok;
-        } else if (c == '?') {
-            const version = self.state_data.version;
-            self.state = .xml_decl_end;
-            self.token_data = .{ .xml_declaration = .{ .version = version, .encoding = null, .standalone = null } };
-            return .xml_declaration;
-        },
-
-        .xml_decl_encoding_name => if (c == self.state_data.left[0]) {
-            if (self.state_data.left.len == 1) {
-                self.state = .xml_decl_after_encoding_name;
-                // self.state_data.version = self.state_data.version;
-            } else {
-                self.state_data.left = self.state_data.left[1..];
-            }
-            return .ok;
-        },
-
-        .xml_decl_after_encoding_name => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == '=') {
-            self.state = .xml_decl_after_encoding_equals;
-            // self.state_data.version = self.state_data.version;
-            return .ok;
-        },
-
-        .xml_decl_after_encoding_equals => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == '"' or c == '\'') {
-            self.state = .xml_decl_encoding_value_start;
-            // self.state_data.version = self.state_data.version;
-            self.state_data.start = self.pos + len;
-            self.state_data.quote = @as(u8, @intCast(c));
-            return .ok;
-        },
-
-        .xml_decl_encoding_value_start => if (syntax.isEncodingStartChar(c)) {
-            self.state = .xml_decl_encoding_value;
-            // self.state_data.version = self.state_data.version;
-            // self.state_data.start = self.state_data.start;
-            // self.state_data.quote = self.state_data.quote;
-            return .ok;
-        },
-
-        .xml_decl_encoding_value => if (c == self.state_data.quote) {
-            self.state = .xml_decl_after_encoding_value;
-            // self.state_data.version = self.state_data.version;
-            self.state_data.encoding = .{ .start = self.state_data.start, .end = self.pos };
-            return .ok;
-        } else if (syntax.isEncodingChar(c)) {
-            return .ok;
-        },
-
-        .xml_decl_after_encoding_value => if (syntax.isSpace(c)) {
-            self.state = .xml_decl_after_encoding;
-            // self.state_data.version = self.state_data.version;
-            // self.state_data.encoding = self.state_data.encoding;
-            return .ok;
-        } else if (c == '?') {
-            const version = self.state_data.version;
-            const encoding = self.state_data.encoding;
-            self.state = .xml_decl_end;
-            self.token_data = .{ .xml_declaration = .{ .version = version, .encoding = encoding, .standalone = null } };
-            return .xml_declaration;
-        },
-
-        .xml_decl_after_encoding => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == 's') {
-            self.state = .xml_decl_standalone_name;
-            // self.state_data.version = self.state_data.version;
-            // self.state_data.encoding = self.state_data.encoding;
-            self.state_data.left = "tandalone";
-            return .ok;
-        } else if (c == '?') {
-            const version = self.state_data.version;
-            const encoding = self.state_data.encoding;
-            self.state = .xml_decl_end;
-            self.token_data = .{ .xml_declaration = .{ .version = version, .encoding = encoding, .standalone = null } };
-            return .xml_declaration;
-        },
-
-        .xml_decl_standalone_name => if (c == self.state_data.left[0]) {
-            if (self.state_data.left.len == 1) {
-                self.state = .xml_decl_after_standalone_name;
-                // self.state_data.version = self.state_data.version;
-                // self.state_data.encoding = self.state_data.encoding;
-            } else {
-                self.state_data.left = self.state_data.left[1..];
-            }
-            return .ok;
-        },
-
-        .xml_decl_after_standalone_name => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == '=') {
-            self.state = .xml_decl_after_standalone_equals;
-            // self.state_data.version = self.state_data.version;
-            // self.state_data.encoding = self.state_data.encoding;
-            return .ok;
-        },
-
-        .xml_decl_after_standalone_equals => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == '"' or c == '\'') {
-            self.state = .xml_decl_standalone_value_start;
-            // self.state_data.version = self.state_data.version;
-            // self.state_data.encoding = self.state_data.encoding;
-            self.state_data.quote = @as(u8, @intCast(c));
-            return .ok;
-        },
-
-        .xml_decl_standalone_value_start => if (c == 'y') {
-            const version = self.state_data.version;
-            const encoding = self.state_data.encoding;
-            self.state = .xml_decl_standalone_value;
-            // self.state_data.quote = self.state_data.quote;
-            self.state_data.left = "es";
-            self.token_data = .{ .xml_declaration = .{ .version = version, .encoding = encoding, .standalone = true } };
-            return .xml_declaration;
-        } else if (c == 'n') {
-            const version = self.state_data.version;
-            const encoding = self.state_data.encoding;
-            self.state = .xml_decl_standalone_value;
-            // self.state_data.quote = self.state_data.quote;
-            self.state_data.left = "o";
-            self.token_data = .{ .xml_declaration = .{ .version = version, .encoding = encoding, .standalone = false } };
-            return .xml_declaration;
-        },
-
-        .xml_decl_standalone_value => if (c == self.state_data.left[0]) {
-            if (self.state_data.left.len == 1) {
-                self.state = .xml_decl_standalone_value_end;
-                // self.state_data.quote = self.state_data.quote;
-            } else {
-                self.state_data.left = self.state_data.left[1..];
-            }
-            return .ok;
-        },
-
-        .xml_decl_standalone_value_end => if (c == self.state_data.quote) {
-            self.state = .xml_decl_after_standalone;
-            return .ok;
-        },
-
-        .xml_decl_after_standalone => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == '?') {
-            self.state = .xml_decl_end;
-            return .ok;
-        },
-
-        .xml_decl_end => if (c == '>') {
-            self.state = .start_after_xml_decl;
-            return .ok;
-        },
-
-        .start_after_xml_decl => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == '<') {
-            self.state = .unknown_start;
-            return .ok;
-        },
-
-        .doctype_start => if (c == self.state_data.left[0]) {
-            if (self.state_data.left.len == 1) {
-                return error.DoctypeNotSupported;
-            } else {
-                self.state_data.left = self.state_data.left[1..];
-                return .ok;
-            }
-        },
-
-        .document_content => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == '<') {
-            self.state = .unknown_start;
-            return .ok;
-        },
-
-        .unknown_start => if (syntax.isNameStartChar(c) and !self.seen_root_element) {
-            self.state = .element_start_name;
-            self.state_data.start = self.pos;
-            return .ok;
-        } else if (c == '/' and self.depth > 0) {
-            self.state = .element_end;
-            return .ok;
-        } else if (c == '!') {
-            self.state = .unknown_start_bang;
-            return .ok;
-        } else if (c == '?') {
-            self.state = .pi;
-            return .ok;
-        },
-
-        .unknown_start_bang => if (c == '-') {
-            self.state = .comment_before_start;
-            return .ok;
-        } else if (self.depth > 0 and c == '[') {
-            // Textual content is not allowed outside the root element.
-            self.state = .cdata_before_start;
-            self.state_data.left = "CDATA[";
-            return .ok;
-        } else if (self.depth == 0 and !self.seen_root_element and c == 'D') {
-            self.state = .doctype_start;
-            self.state_data.left = "OCTYPE ";
-            return .ok;
-        },
-
-        .comment_before_start => if (c == '-') {
-            self.state = .comment;
-            self.state_data.start = self.pos + len;
-            self.token_data = .{ .comment_start = {} };
-            return .comment_start;
-        },
-
-        .comment => if (c == '-') {
-            self.state = .comment_maybe_before_end;
-            // self.state_data.start = self.state_data.start;
-            self.state_data.end = self.pos;
-            return .ok;
-        } else if (syntax.isChar(c)) {
-            return .ok;
-        },
-
-        .comment_maybe_before_end => if (c == '-') {
-            const content = Range{ .start = self.state_data.start, .end = self.state_data.end };
-            self.state = .comment_before_end;
-            self.token_data = .{ .comment_content = .{ .content = content, .final = true } };
-            return .comment_content;
-        } else if (syntax.isChar(c)) {
-            self.state = .comment;
-            // self.state_data.start = self.state_data.start;
-            return .ok;
-        },
-
-        .comment_before_end => if (c == '>') {
-            if (self.depth == 0) {
-                self.state = .document_content;
-            } else {
-                self.state = .content;
-                self.state_data.start = self.pos + len;
-            }
-            return .ok;
-        },
-
-        .pi => if (syntax.isNameStartChar(c)) {
-            self.state = .pi_target;
-            self.state_data.start = self.pos;
-            return .ok;
-        },
-
-        .pi_target => if (syntax.isNameChar(c)) {
-            return .ok;
-        } else if (syntax.isSpace(c)) {
-            const target = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .pi_after_target;
-            self.token_data = .{ .pi_start = .{ .target = target } };
-            return .pi_start;
-        } else if (c == '?') {
-            const target = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .pi_maybe_end;
-            self.state_data.start = self.pos;
-            self.state_data.end = self.pos;
-            self.token_data = .{ .pi_start = .{ .target = target } };
-            return .pi_start;
-        },
-
-        .pi_after_target => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (syntax.isChar(c)) {
-            self.state = .pi_content;
-            self.state_data.start = self.pos;
-            return .ok;
-        } else if (c == '?') {
-            self.state = .pi_maybe_end;
-            self.state_data.start = self.pos;
-            self.state_data.end = self.pos;
-            return .ok;
-        },
-
-        .pi_content => if (c == '?') {
-            self.state = .pi_maybe_end;
-            // self.state_data.start = self.state_data.start;
-            self.state_data.end = self.pos;
-            return .ok;
-        } else if (syntax.isChar(c)) {
-            return .ok;
-        },
-
-        .pi_maybe_end => if (c == '>') {
-            const content = Range{ .start = self.state_data.start, .end = self.state_data.end };
-            if (self.depth == 0) {
-                self.state = .document_content;
-            } else {
-                self.state = .content;
-                self.state_data.start = self.pos + len;
-            }
-            self.token_data = .{ .pi_content = .{ .content = content, .final = true } };
-            return .pi_content;
-        } else if (syntax.isChar(c)) {
-            self.state = .pi_content;
-            // self.state_data.start = self.state_data.start;
-            return .ok;
-        },
-
-        .cdata_before_start => if (c == self.state_data.left[0]) {
-            if (self.state_data.left.len == 1) {
-                self.state = .cdata;
-                self.state_data.start = self.pos + len;
-            } else {
-                self.state_data.left = self.state_data.left[1..];
-            }
-            return .ok;
-        },
-
-        .cdata => if (c == ']') {
-            self.state = .cdata_maybe_before_end;
-            // self.state_data.start = self.state_data.start;
-            self.state_data.end = self.pos;
-            return .ok;
-        } else if (syntax.isChar(c)) {
-            return .ok;
-        },
-
-        .cdata_maybe_before_end => if (c == ']') {
-            self.state = .cdata_maybe_end;
-            // self.state_data.start = self.state_data.start;
-            // self.state_data.end = self.state_data.end;
-            return .ok;
-        } else if (syntax.isChar(c)) {
-            self.state = .cdata;
-            // self.state_data.start = self.state_data.start;
-            return .ok;
-        },
-
-        .cdata_maybe_end => if (c == ']') {
-            // For every ']' after two have been encountered, the end
-            // position is incremented so only the final ']]>' marks the end of
-            // CDATA.
-            self.state_data.end += 1;
-            return .ok;
-        } else if (c == '>') {
-            const text = Range{ .start = self.state_data.start, .end = self.state_data.end };
-            self.state = .content;
-            self.state_data.start = self.pos + len;
-            self.token_data = .{ .element_content = .{ .content = .{ .text = text } } };
-            return .element_content;
-        } else if (syntax.isChar(c)) {
-            self.state = .cdata;
-            // self.state_data.start = self.state_data.start;
-            return .ok;
-        },
-
-        .element_start_name => if (syntax.isNameChar(c)) {
-            return .ok;
-        } else if (syntax.isSpace(c)) {
-            self.depth += 1;
-            const name = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .element_start_after_name;
-            self.token_data = .{ .element_start = .{ .name = name } };
-            return .element_start;
-        } else if (c == '/') {
-            self.depth += 1;
-            const name = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .element_start_empty;
-            self.token_data = .{ .element_start = .{ .name = name } };
-            return .element_start;
-        } else if (c == '>') {
-            self.depth += 1;
-            const name = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .content;
-            self.state_data.start = self.pos + len;
-            self.token_data = .{ .element_start = .{ .name = name } };
-            return .element_start;
-        },
-
-        .element_start_after_name => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (syntax.isNameStartChar(c)) {
-            self.state = .attribute_name;
-            self.state_data.start = self.pos;
-            return .ok;
-        } else if (c == '/') {
-            self.state = .element_start_empty;
-            return .ok;
-        } else if (c == '>') {
-            self.state = .content;
-            self.state_data.start = self.pos + len;
-            return .ok;
-        },
-
-        .element_start_empty => if (c == '>') {
-            self.depth -= 1;
-            if (self.depth == 0) {
-                self.seen_root_element = true;
-            }
-            if (self.depth == 0) {
-                self.state = .document_content;
-            } else {
-                self.state = .content;
-                self.state_data.start = self.pos + len;
-            }
-            self.token_data = .{ .element_end_empty = {} };
-            return .element_end_empty;
-        },
-
-        .attribute_name => if (syntax.isNameChar(c)) {
-            return .ok;
-        } else if (syntax.isSpace(c)) {
-            const name = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .attribute_after_name;
-            self.token_data = .{ .attribute_start = .{ .name = name } };
-            return .attribute_start;
-        } else if (c == '=') {
-            const name = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .attribute_after_equals;
-            self.token_data = .{ .attribute_start = .{ .name = name } };
-            return .attribute_start;
-        },
-
-        .attribute_after_name => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == '=') {
-            self.state = .attribute_after_equals;
-            return .ok;
-        },
-
-        .attribute_after_equals => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == '"' or c == '\'') {
-            self.state = .attribute_content;
-            self.state_data.start = self.pos + len;
-            self.state_data.quote = @as(u8, @intCast(c));
-            return .ok;
-        },
-
-        .attribute_content => if (c == self.state_data.quote) {
-            const text = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .attribute_after_content;
-            self.token_data = .{ .attribute_content = .{ .content = .{ .text = text }, .final = true } };
-            return .attribute_content;
-        } else if (c == '&') {
-            const text = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .attribute_content_ref_start;
-            // self.state_data.quote = self.state_data.quote;
-            if (text.isEmpty()) {
-                // We do not want to emit an empty text content token between entities
-                return .ok;
-            } else {
-                self.token_data = .{ .attribute_content = .{ .content = .{ .text = text } } };
-                return .attribute_content;
-            }
-        } else if (c != '<' and syntax.isChar(c)) {
-            return .ok;
-        },
-
-        .attribute_content_ref_start => if (syntax.isNameStartChar(c)) {
-            self.state = .attribute_content_entity_ref_name;
-            self.state_data.start = self.pos;
-            // self.state_data.quote = self.state_data.quote;
-            return .ok;
-        } else if (c == '#') {
-            self.state = .attribute_content_char_ref_start;
-            // self.state_data.quote = self.state_data.quote;
-            return .ok;
-        },
-
-        .attribute_content_entity_ref_name => if (syntax.isNameChar(c)) {
-            return .ok;
-        } else if (c == ';') {
-            const entity = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .attribute_content;
-            self.state_data.start = self.pos + len;
-            // self.state_data.quote = self.state_data.quote;
-            self.token_data = .{ .attribute_content = .{ .content = .{ .entity = entity } } };
-            return .attribute_content;
-        },
-
-        .attribute_content_char_ref_start => if (syntax.isDigit(c)) {
-            self.state = .attribute_content_char_ref;
-            self.state_data.hex = false;
-            self.state_data.value = syntax.digitValue(c);
-            // self.state_data.quote = self.state_data.quote;
-            return .ok;
-        } else if (c == 'x') {
-            self.state = .attribute_content_char_ref;
-            self.state_data.hex = true;
-            self.state_data.value = 0;
-            // self.state_data.quote = self.state_data.quote;
-            return .ok;
-        },
-
-        .attribute_content_char_ref => if (!self.state_data.hex and syntax.isDigit(c)) {
-            const value = 10 * @as(u32, self.state_data.value) + syntax.digitValue(c);
-            if (value > std.math.maxInt(u21)) {
-                return error.InvalidCharacterReference;
-            }
-            self.state_data.value = @as(u21, @intCast(value));
-            return .ok;
-        } else if (self.state_data.hex and syntax.isHexDigit(c)) {
-            const value = 16 * @as(u32, self.state_data.value) + syntax.hexDigitValue(c);
-            if (value > std.math.maxInt(u21)) {
-                return error.InvalidCharacterReference;
-            }
-            self.state_data.value = @as(u21, @intCast(value));
-            return .ok;
-        } else if (c == ';') {
-            const codepoint = self.state_data.value;
-            if (!syntax.isChar(codepoint)) {
-                return error.InvalidCharacterReference;
-            }
-            self.state = .attribute_content;
-            self.state_data.start = self.pos + len;
-            // self.state_data.quote = self.state_data.quote;
-            self.token_data = .{ .attribute_content = .{ .content = .{ .codepoint = codepoint } } };
-            return .attribute_content;
-        },
-
-        .attribute_after_content => if (syntax.isSpace(c)) {
-            self.state = .element_start_after_name;
-            return .ok;
-        } else if (c == '/') {
-            self.state = .element_start_empty;
-            return .ok;
-        } else if (c == '>') {
-            self.state = .content;
-            self.state_data.start = self.pos + len;
-            return .ok;
-        },
-
-        .element_end => if (syntax.isNameStartChar(c)) {
-            self.state = .element_end_name;
-            self.state_data.start = self.pos;
-            return .ok;
-        },
-
-        .element_end_name => if (syntax.isNameChar(c)) {
-            return .ok;
-        } else if (syntax.isSpace(c)) {
-            self.depth -= 1;
-            const name = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .element_end_after_name;
-            self.token_data = .{ .element_end = .{ .name = name } };
-            return .element_end;
-        } else if (c == '>') {
-            self.depth -= 1;
-            if (self.depth == 0) {
-                self.seen_root_element = true;
-            }
-            const name = Range{ .start = self.state_data.start, .end = self.pos };
-            if (self.depth == 0) {
-                self.state = .document_content;
-            } else {
-                self.state = .content;
-                self.state_data.start = self.pos + len;
-            }
-            self.token_data = .{ .element_end = .{ .name = name } };
-            return .element_end;
-        },
-
-        .element_end_after_name => if (syntax.isSpace(c)) {
-            return .ok;
-        } else if (c == '>') {
-            if (self.depth == 0) {
-                self.seen_root_element = true;
-            }
-            if (self.depth == 0) {
-                self.state = .document_content;
-            } else {
-                self.state = .content;
-                self.state_data.start = self.pos + len;
-            }
-            return .ok;
-        },
-
-        inline .content,
-        .content_cdata_maybe_before_end,
-        .content_cdata_maybe_end,
-        => |state| if (c == ']') {
-            switch (state) {
-                .content => {
-                    self.state = .content_cdata_maybe_before_end;
-                    // self.state_data.start = self.state_data.start;
-                },
-                .content_cdata_maybe_before_end => {
-                    self.state = .content_cdata_maybe_end;
-                    // self.state_data.start = self.state_data.start;
-                },
-                else => {},
-            }
-            return .ok;
-        } else if (state == .content_cdata_maybe_end and c == ']') {
-            return .ok;
-        } else if (state == .content_cdata_maybe_end and c == '>') {
-            return error.SyntaxError;
-        } else if (c == '<') {
-            const text = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .unknown_start;
-            if (text.isEmpty()) {
-                // Do not report empty text content between elements, e.g.
-                // <e1></e1><e2></e2> (there is no text content between or
-                // within e1 and e2).
-                return .ok;
-            } else {
-                self.token_data = .{ .element_content = .{ .content = .{ .text = text } } };
-                return .element_content;
-            }
-        } else if (c == '&') {
-            const text = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .content_ref_start;
-            if (text.isEmpty()) {
-                return .ok;
-            } else {
-                self.token_data = .{ .element_content = .{ .content = .{ .text = text } } };
-                return .element_content;
-            }
-        } else if (syntax.isChar(c)) {
-            if (state != .content) {
-                self.state = .content;
-                // self.state_data.start = self.state_data.start;
-            }
-            return .ok;
-        },
-
-        .content_ref_start => if (syntax.isNameStartChar(c)) {
-            self.state = .content_entity_ref_name;
-            self.state_data.start = self.pos;
-            return .ok;
-        } else if (c == '#') {
-            self.state = .content_char_ref_start;
-            return .ok;
-        },
-
-        .content_entity_ref_name => if (syntax.isNameChar(c)) {
-            return .ok;
-        } else if (c == ';') {
-            const entity = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state = .content;
-            self.state_data.start = self.pos + len;
-            self.token_data = .{ .element_content = .{ .content = .{ .entity = entity } } };
-            return .element_content;
-        },
-
-        .content_char_ref_start => if (syntax.isDigit(c)) {
-            self.state = .content_char_ref;
-            self.state_data.hex = false;
-            self.state_data.value = syntax.digitValue(c);
-            return .ok;
-        } else if (c == 'x') {
-            self.state = .content_char_ref;
-            self.state_data.hex = true;
-            self.state_data.value = 0;
-            return .ok;
-        },
-
-        .content_char_ref => if (!self.state_data.hex and syntax.isDigit(c)) {
-            const value = 10 * @as(u32, self.state_data.value) + syntax.digitValue(c);
-            if (value > std.math.maxInt(u21)) {
-                return error.InvalidCharacterReference;
-            }
-            self.state_data.value = @as(u21, @intCast(value));
-            return .ok;
-        } else if (self.state_data.hex and syntax.isHexDigit(c)) {
-            const value = 16 * @as(u32, self.state_data.value) + syntax.hexDigitValue(c);
-            if (value > std.math.maxInt(u21)) {
-                return error.InvalidCharacterReference;
-            }
-            self.state_data.value = @as(u21, @intCast(value));
-            return .ok;
-        } else if (c == ';') {
-            const codepoint = self.state_data.value;
-            if (!syntax.isChar(codepoint)) {
-                return error.InvalidCharacterReference;
-            }
-            self.state = .content;
-            self.state_data.start = self.pos + len;
-            self.token_data = .{ .element_content = .{ .content = .{ .codepoint = codepoint } } };
-            return .element_content;
-        },
-
-        .@"error" => return error.SyntaxError,
-    }
-
-    return error.SyntaxError;
-}
-
-/// Signals that there is no further input to scan, and returns an error if
-/// the scanner is not in a valid state to handle this (for example, if this
-/// is called while in the middle of element content).
-pub fn endInput(self: *Scanner) error{UnexpectedEndOfInput}!void {
-    if (self.state != .document_content or !self.seen_root_element) {
-        return error.UnexpectedEndOfInput;
-    }
-}
-
-test Scanner {
-    try testValid(
-        \\<?xml version="1.0"?>
-        \\<?some-pi?>
-        \\<!-- A processing instruction with content follows -->
-        \\<?some-pi-with-content content?>
-        \\<root>
-        \\  <p class="test">Hello, <![CDATA[world!]]></p>
-        \\  <line />
-        \\  <?another-pi?>
-        \\  Text content goes here.
-        \\  <div><p>&amp;</p></div>
-        \\</root>
-        \\<!-- Comments are allowed after the end of the root element -->
-        \\
-        \\<?comment So are PIs ?>
-        \\
-        \\
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 15, .end = 18 } } },
-        .{ .pi_start = .{ .target = .{ .start = 24, .end = 31 } } }, // some-pi
-        .{ .pi_content = .{ .content = .{ .start = 31, .end = 31 }, .final = true } },
-        .comment_start,
-        .{ .comment_content = .{ .content = .{ .start = 38, .end = 85 }, .final = true } },
-        .{ .pi_start = .{ .target = .{ .start = 91, .end = 111 } } }, // some-pi-with-content
-        .{ .pi_content = .{ .content = .{ .start = 112, .end = 119 }, .final = true } },
-        .{ .element_start = .{ .name = .{ .start = 123, .end = 127 } } }, // root
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 128, .end = 131 } } } },
-        .{ .element_start = .{ .name = .{ .start = 132, .end = 133 } } }, // p
-        .{ .attribute_start = .{ .name = .{ .start = 134, .end = 139 } } },
-        .{ .attribute_content = .{ .content = .{ .text = .{ .start = 141, .end = 145 } }, .final = true } },
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 147, .end = 154 } } } },
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 163, .end = 169 } } } },
-        .{ .element_end = .{ .name = .{ .start = 174, .end = 175 } } }, // /p
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 176, .end = 179 } } } },
-        .{ .element_start = .{ .name = .{ .start = 180, .end = 184 } } }, // line
-        .element_end_empty,
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 187, .end = 190 } } } },
-        .{ .pi_start = .{ .target = .{ .start = 192, .end = 202 } } }, // another-pi
-        .{ .pi_content = .{ .content = .{ .start = 202, .end = 202 }, .final = true } },
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 204, .end = 233 } } } },
-        .{ .element_start = .{ .name = .{ .start = 234, .end = 237 } } }, // div
-        .{ .element_start = .{ .name = .{ .start = 239, .end = 240 } } }, // p
-        .{ .element_content = .{ .content = .{ .entity = .{ .start = 242, .end = 245 } } } },
-        .{ .element_end = .{ .name = .{ .start = 248, .end = 249 } } }, // /p
-        .{ .element_end = .{ .name = .{ .start = 252, .end = 255 } } }, // /div
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 256, .end = 257 } } } },
-        .{ .element_end = .{ .name = .{ .start = 259, .end = 263 } } }, // /root
-        .comment_start,
-        .{ .comment_content = .{ .content = .{ .start = 269, .end = 325 }, .final = true } },
-        .{ .pi_start = .{ .target = .{ .start = 332, .end = 339 } } }, // comment
-        .{ .pi_content = .{ .content = .{ .start = 340, .end = 351 }, .final = true } },
-    });
-}
-
-test "BOM" {
-    try testValid("\u{FEFF}<element/>", &.{
-        .{ .element_start = .{ .name = .{ .start = 4, .end = 11 } } },
-        .element_end_empty,
-    });
-}
-
-test "empty root element" {
-    try testValid("<element/>", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .element_end_empty,
-    });
-    try testValid("<element />", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .element_end_empty,
-    });
-}
-
-test "root element with no content" {
-    try testValid("<element></element>", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .{ .element_end = .{ .name = .{ .start = 11, .end = 18 } } },
-    });
-}
-
-test "element content" {
-    try testValid("<message>Hello, world!</message>", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 9, .end = 22 } } } },
-        .{ .element_end = .{ .name = .{ .start = 24, .end = 31 } } },
-    });
-}
-
-test "element nesting" {
-    try testValid("<root><sub><inner/></sub></root>", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 5 } } },
-        .{ .element_start = .{ .name = .{ .start = 7, .end = 10 } } },
-        .{ .element_start = .{ .name = .{ .start = 12, .end = 17 } } },
-        .element_end_empty,
-        .{ .element_end = .{ .name = .{ .start = 21, .end = 24 } } },
-        .{ .element_end = .{ .name = .{ .start = 27, .end = 31 } } },
-    });
-    try testValid("<root   ><sub\t><inner\n/></sub ></root\r  >", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 5 } } },
-        .{ .element_start = .{ .name = .{ .start = 10, .end = 13 } } },
-        .{ .element_start = .{ .name = .{ .start = 16, .end = 21 } } },
-        .element_end_empty,
-        .{ .element_end = .{ .name = .{ .start = 26, .end = 29 } } },
-        .{ .element_end = .{ .name = .{ .start = 33, .end = 37 } } },
-    });
-    try testInvalid("<root></root></outer>", error.SyntaxError, 14);
-    try testInvalid("<root ></root\n></outer\r>", error.SyntaxError, 16);
-    try testIncomplete("<root><sub><inner/></sub>");
-    try testIncomplete("<root   ><sub\t><inner\n/></sub >");
-}
-
-test "XML declaration" {
-    try testValid(
-        \\<?xml version="1.0"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 15, .end = 18 } } },
-        .{ .element_start = .{ .name = .{ .start = 23, .end = 27 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version = "1.0"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 17, .end = 20 } } },
-        .{ .element_start = .{ .name = .{ .start = 25, .end = 29 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version="1.1"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 15, .end = 18 } } },
-        .{ .element_start = .{ .name = .{ .start = 23, .end = 27 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version="1.999"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 15, .end = 20 } } },
-        .{ .element_start = .{ .name = .{ .start = 25, .end = 29 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version="1.0" encoding="UTF-8"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 15, .end = 18 }, .encoding = .{ .start = 30, .end = 35 } } },
-        .{ .element_start = .{ .name = .{ .start = 40, .end = 44 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version = "1.0" encoding = "UTF-8"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 17, .end = 20 }, .encoding = .{ .start = 34, .end = 39 } } },
-        .{ .element_start = .{ .name = .{ .start = 44, .end = 48 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version="1.0" encoding="utf-8"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 15, .end = 18 }, .encoding = .{ .start = 30, .end = 35 } } },
-        .{ .element_start = .{ .name = .{ .start = 40, .end = 44 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version="1.0" encoding="Utf-8"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 15, .end = 18 }, .encoding = .{ .start = 30, .end = 35 } } },
-        .{ .element_start = .{ .name = .{ .start = 40, .end = 44 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version="1.0" encoding="ASCII"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 15, .end = 18 }, .encoding = .{ .start = 30, .end = 35 } } },
-        .{ .element_start = .{ .name = .{ .start = 40, .end = 44 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version="1.0" standalone="yes"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 15, .end = 18 }, .standalone = true } },
-        .{ .element_start = .{ .name = .{ .start = 40, .end = 44 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version="1.0" standalone="no"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 15, .end = 18 }, .standalone = false } },
-        .{ .element_start = .{ .name = .{ .start = 39, .end = 43 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version = "1.0" standalone = "yes"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 17, .end = 20 }, .standalone = true } },
-        .{ .element_start = .{ .name = .{ .start = 44, .end = 48 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 15, .end = 18 }, .encoding = .{ .start = 30, .end = 35 }, .standalone = true } },
-        .{ .element_start = .{ .name = .{ .start = 57, .end = 61 } } },
-        .element_end_empty,
-    });
-    try testValid(
-        \\<?xml version = "1.0" encoding = "UTF-8" standalone = "yes"?>
-        \\<root/>
-    , &.{
-        .{ .xml_declaration = .{ .version = .{ .start = 17, .end = 20 }, .encoding = .{ .start = 34, .end = 39 }, .standalone = true } },
-        .{ .element_start = .{ .name = .{ .start = 63, .end = 67 } } },
-        .element_end_empty,
-    });
-    try testInvalid("<?xml version='1.0'encoding='UTF-8'?>", error.SyntaxError, 19);
-    try testInvalid("<?xml version='1.0' encoding='UTF-8'standalone='yes'?>", error.SyntaxError, 36);
-}
-
-test "doctype" {
-    try testInvalid("<!DOCTYPE root><root />", error.DoctypeNotSupported, 9);
-    try testInvalid("<?xml version='1.0'?><!DOCTYPE root><root />", error.DoctypeNotSupported, 30);
-    try testInvalid("<root /><!DOCTYPE root>", error.SyntaxError, 10);
-    try testInvalid("<root><!DOCTYPE root></root>", error.SyntaxError, 8);
-}
-
-test "CDATA" {
-    try testValid("<element><![CDATA[Hi]]></element>", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 18, .end = 20 } } } },
-        .{ .element_end = .{ .name = .{ .start = 25, .end = 32 } } },
-    });
-    try testValid("<element><![CDATA[Hi]]]></element>", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 18, .end = 21 } } } },
-        .{ .element_end = .{ .name = .{ .start = 26, .end = 33 } } },
-    });
-    try testValid("<element><![CDATA[Hi]>]]]]]]]></element>", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 18, .end = 27 } } } },
-        .{ .element_end = .{ .name = .{ .start = 32, .end = 39 } } },
-    });
-}
-
-test "references" {
-    try testValid(
-        \\<element attribute="Hello&#x2C;&#32;world &amp; friends!">&lt;Hi&#33;&#x21;&gt;</element>
-    , &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .{ .attribute_start = .{ .name = .{ .start = 9, .end = 18 } } },
-        .{ .attribute_content = .{ .content = .{ .text = .{ .start = 20, .end = 25 } } } },
-        .{ .attribute_content = .{ .content = .{ .codepoint = 0x2C } } },
-        .{ .attribute_content = .{ .content = .{ .codepoint = 32 } } },
-        .{ .attribute_content = .{ .content = .{ .text = .{ .start = 36, .end = 42 } } } },
-        .{ .attribute_content = .{ .content = .{ .entity = .{ .start = 43, .end = 46 } } } },
-        .{ .attribute_content = .{ .content = .{ .text = .{ .start = 47, .end = 56 } }, .final = true } },
-        .{ .element_content = .{ .content = .{ .entity = .{ .start = 59, .end = 61 } } } },
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 62, .end = 64 } } } },
-        .{ .element_content = .{ .content = .{ .codepoint = 33 } } },
-        .{ .element_content = .{ .content = .{ .codepoint = 0x21 } } },
-        .{ .element_content = .{ .content = .{ .entity = .{ .start = 76, .end = 78 } } } },
-        .{ .element_end = .{ .name = .{ .start = 81, .end = 88 } } },
-    });
-}
-
-test "PI at document start" {
-    try testValid("<?some-pi?><root/>", &.{
-        .{ .pi_start = .{ .target = .{ .start = 2, .end = 9 } } },
-        .{ .pi_content = .{ .content = .{ .start = 9, .end = 9 }, .final = true } },
-        .{ .element_start = .{ .name = .{ .start = 12, .end = 16 } } },
-        .element_end_empty,
-    });
-    try testValid("<?xm?><root/>", &.{
-        .{ .pi_start = .{ .target = .{ .start = 2, .end = 4 } } },
-        .{ .pi_content = .{ .content = .{ .start = 4, .end = 4 }, .final = true } },
-        .{ .element_start = .{ .name = .{ .start = 7, .end = 11 } } },
-        .element_end_empty,
-    });
-    try testValid("<?xmlm?><root/>", &.{
-        .{ .pi_start = .{ .target = .{ .start = 2, .end = 6 } } },
-        .{ .pi_content = .{ .content = .{ .start = 6, .end = 6 }, .final = true } },
-        .{ .element_start = .{ .name = .{ .start = 9, .end = 13 } } },
-        .element_end_empty,
-    });
-}
-
-test "invalid top-level text" {
-    try testInvalid("Hello, world!", error.SyntaxError, 0);
-    try testInvalid(
-        \\<?xml version="1.0"?>
-        \\Hello, world!
-    , error.SyntaxError, 22);
-    try testInvalid(
-        \\<root />
-        \\Hello, world!
-    , error.SyntaxError, 9);
-}
-
-test "invalid XML declaration" {
-    try testInvalid("<?xml?>", error.SyntaxError, 5);
-    try testInvalid("<? xml version='1.0' ?>", error.SyntaxError, 2);
-    try testInvalid("<?xml version='1.0' standalone='yes' encoding='UTF-8'?>", error.SyntaxError, 37);
-    try testInvalid("<?xml version=\"2.0\"?>", error.SyntaxError, 15);
-    try testInvalid("<?xml version=\"1.\"?>", error.SyntaxError, 17);
-    try testInvalid("<?xml version='1'?>", error.SyntaxError, 16);
-    try testInvalid("<?xml version=''?>", error.SyntaxError, 15);
-    try testInvalid("<?xml version='1.0' encoding=''?>", error.SyntaxError, 30);
-    try testInvalid("<?xml version='1.0' encoding=\"?\"?>", error.SyntaxError, 30);
-    try testInvalid("<?xml version='1.0' encoding=\"UTF-?\"?>", error.SyntaxError, 34);
-    try testInvalid("<?xml version='1.0' standalone='yno'?>", error.SyntaxError, 33);
-    try testInvalid("<?xml version=\"1.0\" standalone=\"\"", error.SyntaxError, 32);
-}
-
-test "invalid reference" {
-    try testInvalid("<element>&</element>", error.SyntaxError, 10);
-    try testInvalid("<element>&amp</element>", error.SyntaxError, 13);
-    try testInvalid("<element>&#ABC;</element>", error.SyntaxError, 11);
-    try testInvalid("<element>&#12C;</element>", error.SyntaxError, 13);
-    try testInvalid("<element>&#xxx;</element>", error.SyntaxError, 12);
-    try testInvalid("<element>&#0;</element>", error.InvalidCharacterReference, 12);
-    try testInvalid("<element>&#x1f0000;</element>", error.InvalidCharacterReference, 18);
-    try testInvalid("<element>&#xD800;</element>", error.InvalidCharacterReference, 16);
-    try testInvalid("<element>&#x110000;</element>", error.InvalidCharacterReference, 18);
-    try testInvalid("<element attr='&' />", error.SyntaxError, 16);
-    try testInvalid("<element attr='&amp' />", error.SyntaxError, 19);
-    try testInvalid("<element attr='&#ABC' />", error.SyntaxError, 17);
-    try testInvalid("<element attr='&#12C' />", error.SyntaxError, 19);
-    try testInvalid("<element attr='&#xxx' />", error.SyntaxError, 18);
-    try testInvalid("<element attr='&#0;' />", error.InvalidCharacterReference, 18);
-    try testInvalid("<element attr='&#x1f0000;' />", error.InvalidCharacterReference, 24);
-    try testInvalid("<element attr='&#xD800;' />", error.InvalidCharacterReference, 22);
-    try testInvalid("<element attr='&#x110000;' />", error.InvalidCharacterReference, 24);
-}
-
-test "invalid content" {
-    try testInvalid("<element>Illegal: ]]></element>", error.SyntaxError, 20);
-    try testInvalid("<element>Also illegal: ]]]></element>", error.SyntaxError, 26);
-    try testValid("<element>]]&gt;</element>", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 9, .end = 11 } } } },
-        .{ .element_content = .{ .content = .{ .entity = .{ .start = 12, .end = 14 } } } },
-        .{ .element_end = .{ .name = .{ .start = 17, .end = 24 } } },
-    });
-    try testValid("<element>[lol]<br/>[lmao]</element>", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 9, .end = 14 } } } },
-        .{ .element_start = .{ .name = .{ .start = 15, .end = 17 } } },
-        .element_end_empty,
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 19, .end = 25 } } } },
-        .{ .element_end = .{ .name = .{ .start = 27, .end = 34 } } },
-    });
-}
-
-test "attributes" {
-    try testValid("<element attr1='1' attr2='2'/>", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .{ .attribute_start = .{ .name = .{ .start = 9, .end = 14 } } },
-        .{ .attribute_content = .{ .content = .{ .text = .{ .start = 16, .end = 17 } }, .final = true } },
-        .{ .attribute_start = .{ .name = .{ .start = 19, .end = 24 } } },
-        .{ .attribute_content = .{ .content = .{ .text = .{ .start = 26, .end = 27 } }, .final = true } },
-        .element_end_empty,
-    });
-    try testValid("<element attr1='1' attr2='2' />", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .{ .attribute_start = .{ .name = .{ .start = 9, .end = 14 } } },
-        .{ .attribute_content = .{ .content = .{ .text = .{ .start = 16, .end = 17 } }, .final = true } },
-        .{ .attribute_start = .{ .name = .{ .start = 19, .end = 24 } } },
-        .{ .attribute_content = .{ .content = .{ .text = .{ .start = 26, .end = 27 } }, .final = true } },
-        .element_end_empty,
-    });
-    try testInvalid("<element attr1='1'attr2='2'/>", error.SyntaxError, 18);
-    try testInvalid("<elementattr1='1'/>", error.SyntaxError, 13);
-
-    try testInvalid("<element attr='<>' />", error.SyntaxError, 15);
-    try testValid("<element attr='&lt;&gt;' />", &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .{ .attribute_start = .{ .name = .{ .start = 9, .end = 13 } } },
-        .{ .attribute_content = .{ .content = .{ .entity = .{ .start = 16, .end = 18 } } } },
-        .{ .attribute_content = .{ .content = .{ .entity = .{ .start = 20, .end = 22 } } } },
-        .{ .attribute_content = .{ .content = .{ .text = .{ .start = 23, .end = 23 } }, .final = true } },
-        .element_end_empty,
-    });
-}
-
-test "missing root element" {
-    try testIncomplete("");
-    try testIncomplete("<?xml version=\"1.0\"?>");
-}
-
-test "incomplete document" {
-    try testIncomplete("<");
-    try testIncomplete("<root");
-    try testIncomplete("<root>");
-    try testIncomplete("<root/");
-    try testIncomplete("<root></root");
-}
-
-fn testValid(input: []const u8, expected_tokens: []const Token.Full) !void {
-    var scanner = Scanner{};
-    var tokens = std.ArrayListUnmanaged(Token.Full){};
-    defer tokens.deinit(testing.allocator);
-    var input_codepoints = (try unicode.Utf8View.init(input)).iterator();
-    while (input_codepoints.nextCodepointSlice()) |c_bytes| {
-        const c = unicode.utf8Decode(c_bytes) catch unreachable;
-        const token = scanner.next(c, c_bytes.len) catch |e| {
-            std.debug.print("error {} at char '{u}' position {}\n", .{ e, c, scanner.pos });
-            return e;
-        };
-        if (token != .ok) {
-            try tokens.append(testing.allocator, scanner.fullToken(token));
-        }
-    }
-    try scanner.endInput();
-    try testing.expectEqualSlices(Token.Full, expected_tokens, tokens.items);
-}
-
-fn testInvalid(input: []const u8, expected_error: Error, expected_error_pos: usize) !void {
-    var scanner = Scanner{};
-    var input_codepoints = (try unicode.Utf8View.init(input)).iterator();
-    while (input_codepoints.nextCodepointSlice()) |c_bytes| {
-        const c = unicode.utf8Decode(c_bytes) catch unreachable;
-        _ = scanner.next(c, c_bytes.len) catch |e| {
-            try testing.expectEqual(expected_error, e);
-            try testing.expectEqual(expected_error_pos, scanner.pos);
-            return;
-        };
-    }
-    return error.TextExpectedError;
-}
-
-fn testIncomplete(input: []const u8) !void {
-    var scanner = Scanner{};
-    var input_codepoints = (try unicode.Utf8View.init(input)).iterator();
-    while (input_codepoints.nextCodepointSlice()) |c_bytes| {
-        const c = unicode.utf8Decode(c_bytes) catch unreachable;
-        _ = try scanner.next(c, c_bytes.len);
-    }
-    try testing.expectError(error.UnexpectedEndOfInput, scanner.endInput());
-}
-
-/// Attempts to reset the `pos` of the scanner to 0.
-///
-/// This may require a token to be emitted with range information which will be
-/// lost after resetting `pos`: for example, calling this function in the
-/// middle of text content (of an element, attribute, etc.) will return a token
-/// consisting of the text content encountered so far. This token will use a
-/// range corresponding to `pos` _before the reset_, so the buffer backing the
-/// underlying data cannot be cleared until the token is processed. If no token
-/// needs to be emitted, `Token.ok` is returned.
-pub fn resetPos(self: *Scanner) error{CannotReset}!Token {
-    self.token_data = .{ .ok = {} };
-
-    const token: Token = switch (self.state) {
-        // States which contain no positional information can be reset at any
-        // time with no additional token
-        .start,
-        .start_after_bom,
-
-        .start_unknown_start,
-
-        .xml_decl,
-        .xml_decl_version_name,
-        .xml_decl_after_version_name,
-        .xml_decl_after_version_equals,
-        .xml_decl_standalone_value,
-        .xml_decl_standalone_value_end,
-        .xml_decl_after_standalone,
-        .xml_decl_end,
-        .start_after_xml_decl,
-
-        .doctype_start,
-
-        .document_content,
-        .unknown_start,
-        .unknown_start_bang,
-
-        .comment_before_start,
-        .comment_before_end,
-
-        .pi,
-        .pi_after_target,
-
-        .cdata_before_start,
-
-        .element_start_after_name,
-        .element_start_empty,
-
-        .attribute_after_name,
-        .attribute_after_equals,
-        .attribute_content_ref_start,
-        .attribute_content_char_ref_start,
-        .attribute_content_char_ref,
-        .attribute_after_content,
-
-        .element_end,
-        .element_end_after_name,
-
-        .content_ref_start,
-        .content_char_ref_start,
-        .content_char_ref,
-
-        .@"error",
-        => .ok,
-
-        // States which contain positional information but cannot immediately
-        // be emitted as a token cannot be reset
-        .pi_or_xml_decl_start,
-        .pi_or_xml_decl_start_after_xml,
-
-        .xml_decl_version_value_start,
-        .xml_decl_version_value,
-        .xml_decl_after_version_value,
-        .xml_decl_after_version,
-        .xml_decl_encoding_name,
-        .xml_decl_after_encoding_name,
-        .xml_decl_after_encoding_equals,
-        .xml_decl_encoding_value_start,
-        .xml_decl_encoding_value,
-        .xml_decl_after_encoding_value,
-        .xml_decl_after_encoding,
-        .xml_decl_standalone_name,
-        .xml_decl_after_standalone_name,
-        .xml_decl_after_standalone_equals,
-        .xml_decl_standalone_value_start,
-
-        // None of the "maybe_end" states can be reset because we don't know if
-        // the resulting content token should include the possible ending
-        // characters until we read further to unambiguously determine whether
-        // the state is ending.
-        .comment_maybe_before_end,
-
-        .pi_target,
-        .pi_maybe_end,
-
-        .cdata_maybe_before_end,
-        .cdata_maybe_end,
-
-        .element_start_name,
-
-        .attribute_name,
-        .attribute_content_entity_ref_name,
-
-        .element_end_name,
-
-        .content_entity_ref_name,
-        => return error.CannotReset,
-
-        // Some states (specifically, content states) can be reset by emitting
-        // a token with the content seen so far
-        .comment => token: {
-            const range = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state_data.start = 0;
-            if (range.isEmpty()) {
-                break :token .ok;
-            } else {
-                self.token_data = .{ .comment_content = .{ .content = range } };
-                break :token .comment_content;
-            }
-        },
-
-        .pi_content => token: {
-            const range = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state_data.start = 0;
-            if (range.isEmpty()) {
-                break :token .ok;
-            } else {
-                self.token_data = .{ .pi_content = .{ .content = range } };
-                break :token .pi_content;
-            }
-        },
-
-        .cdata => token: {
-            const range = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state_data.start = 0;
-            if (range.isEmpty()) {
-                break :token .ok;
-            } else {
-                self.token_data = .{ .element_content = .{ .content = .{ .text = range } } };
-                break :token .element_content;
-            }
-        },
-
-        .attribute_content => token: {
-            const range = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state_data.start = 0;
-            if (range.isEmpty()) {
-                break :token .ok;
-            } else {
-                self.token_data = .{ .attribute_content = .{ .content = .{ .text = range } } };
-                break :token .attribute_content;
-            }
-        },
-
-        .content, .content_cdata_maybe_before_end, .content_cdata_maybe_end => token: {
-            const range = Range{ .start = self.state_data.start, .end = self.pos };
-            self.state_data.start = 0;
-            if (range.isEmpty()) {
-                break :token .ok;
-            } else {
-                self.token_data = .{ .element_content = .{ .content = .{ .text = range } } };
-                break :token .element_content;
-            }
-        },
-    };
-    self.pos = 0;
-    return token;
-}
-
-test resetPos {
-    var scanner = Scanner{};
-    var tokens = std.ArrayListUnmanaged(Token.Full){};
-    defer tokens.deinit(testing.allocator);
-
-    for ("<element>Hello,") |c| {
-        switch (try scanner.next(c, 1)) {
-            .ok => {},
-            else => |token| try tokens.append(testing.allocator, scanner.fullToken(token)),
-        }
-    }
-    try tokens.append(testing.allocator, scanner.fullToken(try scanner.resetPos()));
-    for (" world!</element>") |c| {
-        switch (try scanner.next(c, 1)) {
-            .ok => {},
-            else => |token| try tokens.append(testing.allocator, scanner.fullToken(token)),
-        }
-    }
-
-    try testing.expectEqualSlices(Token.Full, &.{
-        .{ .element_start = .{ .name = .{ .start = 1, .end = 8 } } },
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 9, .end = 15 } } } },
-        .{ .element_content = .{ .content = .{ .text = .{ .start = 0, .end = 7 } } } },
-        .{ .element_end = .{ .name = .{ .start = 9, .end = 16 } } },
-    }, tokens.items);
-}
-
-test "resetPos inside element reference name" {
-    var scanner = Scanner{};
-
-    for ("<element>Hello, world &am") |c| {
-        _ = try scanner.next(c, 1);
-    }
-    try testing.expectError(error.CannotReset, scanner.resetPos());
-}
diff --git a/src/Writer.zig b/src/Writer.zig
new file mode 100644
index 0000000..45a64ad
--- /dev/null
+++ b/src/Writer.zig
@@ -0,0 +1,198 @@
+const std = @import("std");
+const assert = std.debug.assert;
+
+options: Options,
+
+state: State,
+indent_level: u32,
+
+sink: Sink,
+
+const Writer = @This();
+
+pub const Options = struct {
+    indent: []const u8 = "",
+};
+
+pub const Sink = struct {
+    context: *const anyopaque,
+    writeFn: *const fn (context: *const anyopaque, data: []const u8) anyerror!void,
+
+    pub fn write(sink: *Sink, data: []const u8) anyerror!void {
+        return sink.writeFn(sink.context, data);
+    }
+};
+
+const State = enum {
+    start,
+    after_bom,
+    after_xml_declaration,
+    element_start,
+    after_structure_end,
+    text,
+    end,
+};
+
+pub fn init(sink: Sink, options: Options) Writer {
+    return .{
+        .options = options,
+
+        .state = .start,
+        .indent_level = 0,
+
+        .sink = sink,
+    };
+}
+
+pub const WriteError = error{};
+
+pub fn bom(writer: *Writer) anyerror!void {
+    assert(writer.state == .start);
+    try writer.raw("\u{FEFF}");
+    writer.state = .after_bom;
+}
+
+pub fn xmlDeclaration(writer: *Writer, encoding: ?[]const u8, standalone: ?bool) anyerror!void {
+    assert(writer.state == .start or writer.state == .after_bom);
+    try writer.raw("<?xml version=\"1.0\"");
+    if (encoding) |e| {
+        try writer.raw(" encoding=\"");
+        try writer.attributeText(e);
+        try writer.raw("\"");
+    }
+    if (standalone) |s| {
+        if (s) {
+            try writer.raw(" standalone=\"yes\"");
+        } else {
+            try writer.raw(" standalone=\"no\"");
+        }
+    }
+    try writer.raw("?>");
+    if (writer.options.indent.len > 0) try writer.newLineAndIndent();
+    writer.state = .after_xml_declaration;
+}
+
+pub fn elementStart(writer: *Writer, name: []const u8) anyerror!void {
+    switch (writer.state) {
+        .start, .after_bom, .after_xml_declaration, .text => {},
+        .element_start => {
+            try writer.raw(">");
+            try writer.newLineAndIndent();
+        },
+        .after_structure_end => {
+            try writer.newLineAndIndent();
+        },
+        .end => unreachable,
+    }
+    try writer.raw("<");
+    try writer.raw(name);
+    writer.state = .element_start;
+    writer.indent_level += 1;
+}
+
+pub fn elementEnd(writer: *Writer, name: []const u8) anyerror!void {
+    writer.indent_level -= 1;
+    switch (writer.state) {
+        .text => {},
+        .element_start => {
+            try writer.raw(">");
+            try writer.newLineAndIndent();
+        },
+        .after_structure_end => {
+            try writer.newLineAndIndent();
+        },
+        .start, .after_bom, .after_xml_declaration, .end => unreachable,
+    }
+    try writer.raw("</");
+    try writer.raw(name);
+    try writer.raw(">");
+    writer.state = if (writer.indent_level > 0) .after_structure_end else .end;
+}
+
+pub fn elementEndEmpty(writer: *Writer) anyerror!void {
+    assert(writer.state == .element_start);
+    try writer.raw("/>");
+    writer.state = .after_structure_end;
+    writer.indent_level -= 1;
+}
+
+pub fn attribute(writer: *Writer, name: []const u8, value: []const u8) anyerror!void {
+    assert(writer.state == .element_start);
+    try writer.raw(" ");
+    try writer.raw(name);
+    try writer.raw("=\"");
+    try writer.attributeText(value);
+    try writer.raw("\"");
+}
+
+fn attributeText(writer: *Writer, s: []const u8) anyerror!void {
+    var pos: usize = 0;
+    while (std.mem.indexOfAnyPos(u8, s, pos, "\r\n\t&<\"")) |esc_pos| {
+        try writer.raw(s[pos..esc_pos]);
+        try writer.raw(switch (s[esc_pos]) {
+            '\r' => "&#xD;",
+            '\n' => "&#xA;",
+            '\t' => "&#x9;",
+            '&' => "&amp;",
+            '<' => "&lt;",
+            '"' => "&quot;",
+            else => unreachable,
+        });
+        pos = esc_pos + 1;
+    }
+    try writer.raw(s[pos..]);
+}
+
+pub fn pi(writer: *Writer, target: []const u8, data: []const u8) anyerror!void {
+    switch (writer.state) {
+        .start, .after_bom, .after_xml_declaration, .text, .end => {},
+        .element_start => {
+            try writer.raw(">");
+            try writer.newLineAndIndent();
+        },
+        .after_structure_end => {
+            try writer.newLineAndIndent();
+        },
+    }
+    try writer.raw("<?");
+    try writer.raw(target);
+    try writer.raw(" ");
+    try writer.raw(data);
+    try writer.raw("?>");
+    writer.state = .after_structure_end;
+}
+
+pub fn text(writer: *Writer, s: []const u8) anyerror!void {
+    switch (writer.state) {
+        .after_structure_end, .text => {},
+        .element_start => try writer.raw(">"),
+        .start, .after_bom, .after_xml_declaration, .end => unreachable,
+    }
+    var pos: usize = 0;
+    while (std.mem.indexOfAnyPos(u8, s, pos, "\r&<")) |esc_pos| {
+        try writer.raw(s[pos..esc_pos]);
+        try writer.raw(switch (s[esc_pos]) {
+            '\r' => "&#xD;",
+            '&' => "&amp;",
+            '<' => "&lt;",
+            else => unreachable,
+        });
+        pos = esc_pos + 1;
+    }
+    try writer.raw(s[pos..]);
+    writer.state = .text;
+}
+
+fn newLineAndIndent(writer: *Writer) anyerror!void {
+    if (writer.options.indent.len == 0) return;
+
+    try writer.raw("\n");
+    var n: usize = 0;
+    while (n < writer.indent_level) : (n += 1) {
+        try writer.raw(writer.options.indent);
+    }
+}
+
+fn raw(writer: *Writer, s: []const u8) anyerror!void {
+    try writer.sink.write(s);
+}
diff --git a/src/compat.zig b/src/compat.zig
deleted file mode 100644
index 79a65f0..0000000
--- a/src/compat.zig
+++ /dev/null
@@ -1,17 +0,0 @@
-//! Compatibility wrappers for APIs changed since Zig 0.12.
-
-const std = @import("std");
-
-pub fn ComptimeStringMapType(comptime V: type) type {
-    return if (@hasDecl(std, "ComptimeStringMap"))
-        type
-    else
-        std.StaticStringMap(V);
-}
-
-pub fn ComptimeStringMap(comptime V: type, comptime kvs_list: anytype) ComptimeStringMapType(V) {
-    return if (@hasDecl(std, "ComptimeStringMap"))
-        std.ComptimeStringMap(V, kvs_list)
-    else
-        std.StaticStringMap(V).initComptime(kvs_list);
-}
diff --git a/src/encoding.zig b/src/encoding.zig
deleted file mode 100644
index df06d4e..0000000
--- a/src/encoding.zig
+++ /dev/null
@@ -1,451 +0,0 @@
-//! Various encoding-related utilities.
-//!
-//! The central "interface" of this file is `Decoder`, which decodes XML
-//! content into Unicode codepoints for further processing. It consists
-//! of an error type `Error` and several declarations:
-//!
-//! - `const max_encoded_codepoint_len` - the maximum number of bytes a
-//!    single Unicode codepoint may occupy in encoded form.
-//! - `fn readCodepoint(self: *Decoder, reader: anytype, buf: []u8) (Error || @TypeOf(reader).Error))!ReadResult` -
-//!   reads a single codepoint from a `std.io.GenericReader` and writes its UTF-8
-//!   encoding to `buf`. Should return `error.UnexpectedEndOfInput` if a full
-//!   codepoint cannot be read, `error.Overflow` if the UTF-8-encoded form cannot
-//!   be written to `buf`; other decoder-specific errors can also be used.
-//! - `fn adaptTo(self: *Decoder, encoding: []const u8) error{InvalidEncoding}!void` -
-//!   accepts a UTF-8-encoded encoding name and returns an error if the desired
-//!   encoding cannot be handled by the decoder. This is intended to support
-//!   `Decoder` implementations which adapt to the encoding declared by an XML
-//!   document.
-
-const std = @import("std");
-const ascii = std.ascii;
-const testing = std.testing;
-const unicode = std.unicode;
-const Allocator = std.mem.Allocator;
-const ArrayListUnmanaged = std.ArrayListUnmanaged;
-const BoundedArray = std.BoundedArray;
-
-/// The result of reading a single codepoint successfully.
-pub const ReadResult = packed struct(u32) {
-    /// The codepoint read.
-    codepoint: u21,
-    /// The length of the codepoint encoded in UTF-8.
-    byte_length: u10,
-    /// If https://github.com/ziglang/zig/issues/104 is implemented, a much
-    /// better API would be to make `ReadResult` a `packed struct(u31)` instead
-    /// and use `?ReadResult` elsewhere. But, for now, this indicates whether
-    /// `codepoint` and `byte_length` are present, so that the whole thing fits
-    /// in a `u32` rather than unnecessarily taking up 8 bytes.
-    present: bool = true,
-
-    pub const none: ReadResult = .{
-        .codepoint = 0,
-        .byte_length = 0,
-        .present = false,
-    };
-};
-
-/// A decoder which handles UTF-8 or UTF-16, using a BOM to detect UTF-16
-/// endianness.
-///
-/// This is the bare minimum encoding support required of a standard-compliant
-/// XML parser.
-pub const DefaultDecoder = struct {
-    state: union(enum) {
-        start,
-        utf8: Utf8Decoder,
-        utf16_le: Utf16Decoder(.little),
-        utf16_be: Utf16Decoder(.big),
-    } = .start,
-
-    pub const Error = Utf8Decoder.Error || Utf16Decoder(.little).Error || Utf16Decoder(.big).Error;
-
-    pub const max_encoded_codepoint_len = 4;
-    const bom = 0xFEFF;
-    const bom_byte_length = unicode.utf8CodepointSequenceLength(bom) catch unreachable;
-
-    pub fn readCodepoint(self: *DefaultDecoder, reader: anytype, buf: []u8) (Error || @TypeOf(reader).Error)!ReadResult {
-        switch (self.state) {
-            .start => {},
-            inline else => |*inner| return inner.readCodepoint(reader, buf),
-        }
-        // If attempting to match the UTF-16 BOM fails for whatever reason, we
-        // will assume we are reading UTF-8.
-        self.state = .{ .utf8 = .{} };
-        const b = reader.readByte() catch |e| switch (e) {
-            error.EndOfStream => return error.UnexpectedEndOfInput,
-            else => |other| return other,
-        };
-        switch (b) {
-            0xFE => {
-                const b2 = reader.readByte() catch |e| switch (e) {
-                    error.EndOfStream => return error.InvalidUtf8,
-                    else => |other| return other,
-                };
-                if (b2 != 0xFF) return error.InvalidUtf8;
-                self.state = .{ .utf16_be = .{} };
-                if (bom_byte_length > buf.len) return error.Overflow;
-                _ = unicode.utf8Encode(bom, buf) catch unreachable;
-                return .{ .codepoint = bom, .byte_length = bom_byte_length };
-            },
-            0xFF => {
-                const b2 = reader.readByte() catch |e| switch (e) {
-                    error.EndOfStream => return error.InvalidUtf8,
-                    else => |other| return other,
-                };
-                if (b2 != 0xFE) return error.InvalidUtf8;
-                self.state = .{ .utf16_le = .{} };
-                if (bom_byte_length > buf.len) return error.Overflow;
-                _ = unicode.utf8Encode(bom, buf) catch unreachable;
-                return .{ .codepoint = bom, .byte_length = bom_byte_length };
-            },
-            else => {
-                // The rest of this branch is copied from Utf8Decoder
-                const byte_length = unicode.utf8ByteSequenceLength(b) catch return error.InvalidUtf8;
-                if (byte_length > buf.len) return error.Overflow;
-                buf[0] = b;
-                if (byte_length == 1) return .{ .codepoint = b, .byte_length = 1 };
-                reader.readNoEof(buf[1..byte_length]) catch |e| switch (e) {
-                    error.EndOfStream => return error.UnexpectedEndOfInput,
-                    else => |other| return other,
-                };
-                const codepoint = switch (byte_length) {
-                    2 => unicode.utf8Decode2(buf[0..2]),
-                    3 => unicode.utf8Decode3(buf[0..3]),
-                    4 => unicode.utf8Decode4(buf[0..4]),
-                    else => unreachable,
-                } catch return error.InvalidUtf8;
-                return .{ .codepoint = codepoint, .byte_length = byte_length };
-            },
-        }
-    }
-
-    pub fn adaptTo(self: *DefaultDecoder, encoding: []const u8) error{InvalidEncoding}!void {
-        switch (self.state) {
-            .start => {},
-            inline else => |*decoder| try decoder.adaptTo(encoding),
-        }
-    }
-};
-
-test DefaultDecoder {
-    // UTF-8 no BOM
-    {
-        const input = "Hü日😀";
-        var decoder = try testDecode(DefaultDecoder, input, &.{
-            'H',
-            'ü',
-            '日',
-            '😀',
-        });
-        try decoder.adaptTo("utf-8");
-        try decoder.adaptTo("UTF-8");
-    }
-
-    // UTF-8 BOM
-    {
-        const input = "\u{FEFF}Hü日😀";
-        var decoder = try testDecode(DefaultDecoder, input, &.{
-            0xFEFF,
-            'H',
-            'ü',
-            '日',
-            '😀',
-        });
-        try decoder.adaptTo("utf-8");
-        try decoder.adaptTo("UTF-8");
-    }
-
-    // Invalid UTF-8 BOM
-    {
-        const input = "\xEF\x00\x00H";
-        var decoder = try testDecode(DefaultDecoder, input, &.{
-            error.InvalidUtf8,
-            'H',
-        });
-        try decoder.adaptTo("utf-8");
-        try decoder.adaptTo("UTF-8");
-    }
-
-    // UTF-16BE BOM
-    {
-        const input = "\xFE\xFF" ++ // U+FEFF
-            "\x00H" ++
-            "\x00\xFC" ++ // ü
-            "\x65\xE5" ++ // 日
-            "\xD8\x3D\xDE\x00"; // 😀
-        var decoder = try testDecode(DefaultDecoder, input, &.{
-            0xFEFF,
-            'H',
-            'ü',
-            '日',
-            '😀',
-        });
-        try decoder.adaptTo("utf-16");
-        try decoder.adaptTo("UTF-16");
-        try decoder.adaptTo("utf-16be");
-        try decoder.adaptTo("UTF-16BE");
-    }
-
-    // Invalid UTF-16BE BOM
-    {
-        const input = "\xFE\x00H";
-        var decoder = try testDecode(DefaultDecoder, input, &.{
-            error.InvalidUtf8,
-            'H',
-        });
-        try decoder.adaptTo("utf-8");
-        try decoder.adaptTo("UTF-8");
-    }
-
-    // UTF-16LE BOM
-    {
-        const input = "\xFF\xFE" ++ // U+FEFF
-            "H\x00" ++
-            "\xFC\x00" ++ // ü
-            "\xE5\x65" ++ // 日
-            "\x3D\xD8\x00\xDE"; // 😀
-        var decoder = try testDecode(DefaultDecoder, input, &.{
-            0xFEFF,
-            'H',
-            'ü',
-            '日',
-            '😀',
-        });
-        try decoder.adaptTo("utf-16");
-        try decoder.adaptTo("UTF-16");
-        try decoder.adaptTo("utf-16le");
-        try decoder.adaptTo("UTF-16LE");
-    }
-
-    // Invalid UTF-16LE BOM
-    {
-        const input = "\xFF\xFFH";
-        var decoder = try testDecode(DefaultDecoder, input, &.{
-            error.InvalidUtf8,
-            'H',
-        });
-        try decoder.adaptTo("utf-8");
-        try decoder.adaptTo("UTF-8");
-    }
-}
-
-/// A decoder which handles only UTF-8.
-pub const Utf8Decoder = struct {
-    pub const max_encoded_codepoint_len = 4;
-
-    pub const Error = error{ InvalidUtf8, Overflow, UnexpectedEndOfInput };
-
-    pub fn readCodepoint(_: *Utf8Decoder, reader: anytype, buf: []u8) (Error || @TypeOf(reader).Error)!ReadResult {
-        const b = reader.readByte() catch |e| switch (e) {
-            error.EndOfStream => return ReadResult.none,
-            else => |other| return other,
-        };
-        const byte_length = unicode.utf8ByteSequenceLength(b) catch return error.InvalidUtf8;
-        if (byte_length > buf.len) return error.Overflow;
-        buf[0] = b;
-        if (byte_length == 1) return .{ .codepoint = b, .byte_length = 1 };
-        reader.readNoEof(buf[1..byte_length]) catch |e| switch (e) {
-            error.EndOfStream => return error.UnexpectedEndOfInput,
-            else => |other| return other,
-        };
-        const codepoint = switch (byte_length) {
-            2 => unicode.utf8Decode2(buf[0..2]),
-            3 => unicode.utf8Decode3(buf[0..3]),
-            4 => unicode.utf8Decode4(buf[0..4]),
-            else => unreachable,
-        } catch return error.InvalidUtf8;
-        return .{ .codepoint = codepoint, .byte_length = byte_length };
-    }
-
-    pub fn adaptTo(_: *Utf8Decoder, encoding: []const u8) error{InvalidEncoding}!void {
-        if (!ascii.eqlIgnoreCase(encoding, "utf-8")) {
-            return error.InvalidEncoding;
-        }
-    }
-};
-
-test Utf8Decoder {
-    const input =
-        // 1-byte encodings
-        "\x00\x01 ABC abc 123" ++
-        // 2-byte encodings
-        "éèçñåβΘ" ++
-        // 3-byte encodings
-        "日本語ＡＥＳＴＨＥＴＩＣ" ++
-        // 4-byte encodings
-        "😳😂❤️👩‍👩‍👧‍👧" ++
-        // Overlong encodings
-        "\xC0\x80\xE0\x80\x80\xF0\x80\x80\x80" ++
-        // Out of bounds codepoint
-        "\xF7\xBF\xBF\xBF" ++
-        // Surrogate halves
-        "\xED\xA0\x80\xED\xBF\xBF";
-    _ = try testDecode(Utf8Decoder, input, &.{
-        '\x00',
-        '\x01',
-        ' ',
-        'A',
-        'B',
-        'C',
-        ' ',
-        'a',
-        'b',
-        'c',
-        ' ',
-        '1',
-        '2',
-        '3',
-        'é',
-        'è',
-        'ç',
-        'ñ',
-        'å',
-        'β',
-        'Θ',
-        '日',
-        '本',
-        '語',
-        'Ａ',
-        'Ｅ',
-        'Ｓ',
-        'Ｔ',
-        'Ｈ',
-        'Ｅ',
-        'Ｔ',
-        'Ｉ',
-        'Ｃ',
-        '😳',
-        '😂',
-        '❤',
-        '\u{FE0F}', // variation selector-16
-        '👩',
-        '\u{200D}', // zero-width joiner
-        '👩',
-        '\u{200D}', // zero-width joiner
-        '👧',
-        '\u{200D}', // zero-width joiner
-        '👧',
-        error.InvalidUtf8, // 2-byte U+0000
-        error.InvalidUtf8, // 3-byte U+0000
-        error.InvalidUtf8, // 4-byte U+0000
-        error.InvalidUtf8, // attempted U+1FFFFF
-        error.InvalidUtf8, // U+D800
-        error.InvalidUtf8, // U+DFFF
-    });
-}
-
-/// A decoder which handles only UTF-16 of a given endianness.
-pub fn Utf16Decoder(comptime endian: std.builtin.Endian) type {
-    return struct {
-        const Self = @This();
-
-        pub const Error = error{ InvalidUtf16, Overflow, UnexpectedEndOfInput };
-
-        pub const max_encoded_codepoint_len = 4;
-
-        pub fn readCodepoint(_: *Self, reader: anytype, buf: []u8) (Error || @TypeOf(reader).Error)!ReadResult {
-            var u_buf: [2]u8 = undefined;
-            const u_len = try reader.readAll(&u_buf);
-            switch (u_len) {
-                0 => return ReadResult.none,
-                1 => return error.UnexpectedEndOfInput,
-                else => {},
-            }
-            const u = std.mem.readInt(u16, &u_buf, endian);
-            const code_unit_length = unicode.utf16CodeUnitSequenceLength(u) catch return error.InvalidUtf16;
-            const codepoint = switch (code_unit_length) {
-                1 => u,
-                2 => codepoint: {
-                    const low = reader.readInt(u16, endian) catch |e| switch (e) {
-                        error.EndOfStream => return error.UnexpectedEndOfInput,
-                        else => |other| return other,
-                    };
-                    break :codepoint unicode.utf16DecodeSurrogatePair(&.{ u, low }) catch return error.InvalidUtf16;
-                },
-                else => unreachable,
-            };
-            const byte_length = unicode.utf8CodepointSequenceLength(codepoint) catch unreachable;
-            if (byte_length > buf.len) return error.Overflow;
-            _ = unicode.utf8Encode(codepoint, buf) catch unreachable;
-            return .{ .codepoint = codepoint, .byte_length = byte_length };
-        }
-
-        pub fn adaptTo(_: *Self, encoding: []const u8) error{InvalidEncoding}!void {
-            if (!(ascii.eqlIgnoreCase(encoding, "utf-16") or
-                (endian == .big and ascii.eqlIgnoreCase(encoding, "utf-16be")) or
-                (endian == .little and ascii.eqlIgnoreCase(encoding, "utf-16le"))))
-            {
-                return error.InvalidEncoding;
-            }
-        }
-    };
-}
-
-test Utf16Decoder {
-    // little-endian
-    {
-        const input = "\x00\x00" ++ // U+0000
-            "A\x00" ++ // A
-            "b\x00" ++ // b
-            "5\x00" ++ // 5
-            "\xE5\x65" ++ // 日
-            "\x3D\xD8\x33\xDE" ++ // 😳
-            "\x00\xD8\x00\x00" ++ // unpaired high surrogate followed by U+0000
-            "\xFF\xDF" // unpaired low surrogate
-        ;
-        _ = try testDecode(Utf16Decoder(.little), input, &.{
-            '\x00',
-            'A',
-            'b',
-            '5',
-            '日',
-            '😳',
-            error.InvalidUtf16,
-            error.InvalidUtf16,
-        });
-    }
-
-    // big-endian
-    {
-        const input = "\x00\x00" ++ // U+0000
-            "\x00A" ++ // A
-            "\x00b" ++ // b
-            "\x005" ++ // 5
-            "\x65\xE5" ++ // 日
-            "\xD8\x3D\xDE\x33" ++ // 😳
-            "\xD8\x00\x00\x00" ++ // unpaired high surrogate followed by U+0000
-            "\xDF\xFF" // unpaired low surrogate
-        ;
-        _ = try testDecode(Utf16Decoder(.big), input, &.{
-            '\x00',
-            'A',
-            'b',
-            '5',
-            '日',
-            '😳',
-            error.InvalidUtf16,
-            error.InvalidUtf16,
-        });
-    }
-}
-
-fn testDecode(comptime Decoder: type, input: []const u8, expected: []const (Decoder.Error!u21)) !Decoder {
-    var decoder: Decoder = .{};
-    var decoded = ArrayListUnmanaged(Decoder.Error!u21){};
-    defer decoded.deinit(testing.allocator);
-    var input_stream = std.io.fixedBufferStream(input);
-    var buf: [4]u8 = undefined;
-    while (true) {
-        if (decoder.readCodepoint(input_stream.reader(), &buf)) |c| {
-            if (!c.present) break;
-            try decoded.append(testing.allocator, c.codepoint);
-        } else |err| {
-            try decoded.append(testing.allocator, err);
-        }
-    }
-
-    try testing.expectEqualDeep(expected, decoded.items);
-
-    return decoder;
-}
diff --git a/src/node.zig b/src/node.zig
deleted file mode 100644
index d27f61a..0000000
--- a/src/node.zig
+++ /dev/null
@@ -1,60 +0,0 @@
-const std = @import("std");
-const mem = std.mem;
-const ArenaAllocator = std.heap.ArenaAllocator;
-const QName = @import("reader.zig").QName;
-
-/// A node value along with an `ArenaAllocator` used to allocate all memory
-/// backing it.
-pub fn OwnedValue(comptime T: type) type {
-    return struct {
-        value: T,
-        arena: ArenaAllocator,
-
-        const Self = @This();
-
-        pub fn deinit(self: *Self) void {
-            self.arena.deinit();
-            self.* = undefined;
-        }
-    };
-}
-
-/// A node in an XML document.
-pub const Node = union(enum) {
-    document: Document,
-    element: Element,
-    attribute: Attribute,
-    comment: Comment,
-    pi: Pi,
-    text: Text,
-
-    pub const Document = struct {
-        version: []const u8 = "1.0",
-        encoding: ?[]const u8 = null,
-        standalone: ?bool = null,
-        children: []const Node,
-    };
-
-    pub const Element = struct {
-        name: QName,
-        children: []const Node = &.{},
-    };
-
-    pub const Attribute = struct {
-        name: QName,
-        value: []const u8,
-    };
-
-    pub const Comment = struct {
-        content: []const u8,
-    };
-
-    pub const Pi = struct {
-        target: []const u8,
-        content: []const u8,
-    };
-
-    pub const Text = struct {
-        content: []const u8,
-    };
-};
diff --git a/src/reader.zig b/src/reader.zig
deleted file mode 100644
index d01a137..0000000
--- a/src/reader.zig
+++ /dev/null
@@ -1,1149 +0,0 @@
-const std = @import("std");
-const fmt = std.fmt;
-const mem = std.mem;
-const testing = std.testing;
-const unicode = std.unicode;
-const Allocator = mem.Allocator;
-const ArenaAllocator = std.heap.ArenaAllocator;
-const ArrayListUnmanaged = std.ArrayListUnmanaged;
-const ComptimeStringMap = @import("compat.zig").ComptimeStringMap;
-const StringArrayHashMapUnmanaged = std.StringArrayHashMapUnmanaged;
-const StringHashMapUnmanaged = std.StringHashMapUnmanaged;
-const encoding = @import("encoding.zig");
-const syntax = @import("syntax.zig");
-const Node = @import("node.zig").Node;
-const OwnedValue = @import("node.zig").OwnedValue;
-const Scanner = @import("Scanner.zig");
-const Token = @import("token_reader.zig").Token;
-const TokenReader = @import("token_reader.zig").TokenReader;
-
-const max_encoded_codepoint_len = 4;
-
-/// A qualified name.
-pub const QName = struct {
-    prefix: ?[]const u8 = null,
-    ns: ?[]const u8 = null,
-    local: []const u8,
-
-    /// Returns whether this name has the given namespace and local name.
-    pub fn is(self: QName, ns: ?[]const u8, local: []const u8) bool {
-        if (self.ns) |self_ns| {
-            if (!mem.eql(u8, self_ns, ns orelse return false)) {
-                return false;
-            }
-        } else if (ns != null) {
-            return false;
-        }
-        return mem.eql(u8, self.local, local);
-    }
-
-    test is {
-        try testing.expect((QName{ .local = "abc" }).is(null, "abc"));
-        try testing.expect((QName{ .ns = "http://example.com/ns/", .local = "abc" }).is("http://example.com/ns/", "abc"));
-        try testing.expect(!(QName{ .local = "abc" }).is(null, "def"));
-        try testing.expect(!(QName{ .local = "abc" }).is("http://example.com/ns/", "abc"));
-        try testing.expect(!(QName{ .ns = "http://example.com/ns/", .local = "abc" }).is(null, "abc"));
-        try testing.expect(!(QName{ .ns = "http://example.com/ns/", .local = "abc" }).is("http://example.com/ns2/", "abc"));
-        try testing.expect(!(QName{ .ns = "http://example.com/ns/", .local = "abc" }).is("http://example.com/ns/", "def"));
-        try testing.expect(!(QName{ .ns = "http://example.com/ns/", .local = "abc" }).is("http://example.com/ns2/", "def"));
-    }
-
-    fn clone(self: QName, allocator: Allocator) !QName {
-        const prefix = if (self.prefix) |prefix| try allocator.dupe(u8, prefix) else null;
-        errdefer if (prefix) |p| allocator.free(p);
-        const ns = if (self.ns) |ns| try allocator.dupe(u8, ns) else null;
-        errdefer if (ns) |n| allocator.free(n);
-        const local = try allocator.dupe(u8, self.local);
-        return .{ .prefix = prefix, .ns = ns, .local = local };
-    }
-
-    /// Duplicates the `ns` value, if any.
-    ///
-    /// This is to allow the `QName` to outlive the closure of its containing
-    /// scope.
-    inline fn dupNs(self: *QName, allocator: Allocator) !void {
-        if (self.ns) |*ns| {
-            ns.* = try allocator.dupe(u8, ns.*);
-        }
-    }
-};
-
-/// A hash map `Context` which compares namespace URIs and local names (that is,
-/// name identity according to the XML namespaces spec, since the prefix does
-/// not contribute to the identity of a QName).
-const QNameContext = struct {
-    const Self = @This();
-
-    pub fn hash(_: Self, name: QName) u64 {
-        var h = std.hash.Wyhash.init(0);
-        if (name.ns) |ns| {
-            h.update(ns);
-        }
-        h.update(name.local);
-        return h.final();
-    }
-
-    pub fn eql(_: Self, name1: QName, name2: QName) bool {
-        return name1.is(name2.ns, name2.local);
-    }
-};
-
-const QNameSet = std.HashMapUnmanaged(QName, void, QNameContext, std.hash_map.default_max_load_percentage);
-
-/// An event emitted by a reader.
-pub const Event = union(enum) {
-    xml_declaration: XmlDeclaration,
-    element_start: ElementStart,
-    element_content: ElementContent,
-    element_end: ElementEnd,
-    comment: Comment,
-    pi: Pi,
-
-    pub const XmlDeclaration = struct {
-        version: []const u8,
-        encoding: ?[]const u8 = null,
-        standalone: ?bool = null,
-    };
-
-    pub const ElementStart = struct {
-        name: QName,
-        attributes: []const Attribute = &.{},
-    };
-
-    pub const Attribute = struct {
-        name: QName,
-        value: []const u8,
-    };
-
-    pub const ElementContent = struct {
-        content: []const u8,
-    };
-
-    pub const ElementEnd = struct {
-        name: QName,
-    };
-
-    pub const Comment = struct {
-        content: []const u8,
-    };
-
-    pub const Pi = struct {
-        target: []const u8,
-        content: []const u8,
-    };
-};
-
-/// A map of predefined XML entities to their replacement text.
-///
-/// Until DTDs are understood and parsed, these are the only named entities
-/// supported by this parser.
-const entities = ComptimeStringMap([]const u8, .{
-    .{ "amp", "&" },
-    .{ "lt", "<" },
-    .{ "gt", ">" },
-    .{ "apos", "'" },
-    .{ "quot", "\"" },
-});
-
-const xml_ns = "http://www.w3.org/XML/1998/namespace";
-const xmlns_ns = "http://www.w3.org/2000/xmlns/";
-
-const predefined_ns_prefixes = ComptimeStringMap([]const u8, .{
-    .{ "xml", xml_ns },
-    .{ "xmlns", xmlns_ns },
-});
-
-/// A context for namespace information in a document.
-///
-/// The context maintains a hierarchy of namespace scopes. Initially, there is
-/// no active scope (corresponding to the beginning of a document, before the
-/// start of the root element).
-pub const NamespaceContext = struct {
-    scopes: ArrayListUnmanaged(StringHashMapUnmanaged([]const u8)) = .{},
-
-    pub const Error = error{
-        CannotUndeclareNsPrefix,
-        InvalidNsBinding,
-        InvalidQName,
-        UndeclaredNsPrefix,
-        QNameNotAllowed,
-    };
-
-    pub fn deinit(self: *NamespaceContext, allocator: Allocator) void {
-        while (self.scopes.items.len > 0) {
-            self.endScope(allocator);
-        }
-        self.scopes.deinit(allocator);
-        self.* = undefined;
-    }
-
-    /// Starts a new scope.
-    pub fn startScope(self: *NamespaceContext, allocator: Allocator) !void {
-        try self.scopes.append(allocator, .{});
-    }
-
-    /// Ends the current scope.
-    ///
-    /// Only valid if there is a current scope.
-    pub fn endScope(self: *NamespaceContext, allocator: Allocator) void {
-        var bindings = self.scopes.pop();
-        var iter = bindings.iterator();
-        while (iter.next()) |entry| {
-            allocator.free(entry.key_ptr.*);
-            allocator.free(entry.value_ptr.*);
-        }
-        bindings.deinit(allocator);
-    }
-
-    /// Binds the default namespace in the current scope.
-    ///
-    /// Only valid if there is a current scope.
-    pub fn bindDefault(self: *NamespaceContext, allocator: Allocator, uri: []const u8) !void {
-        if (mem.eql(u8, uri, xml_ns) or mem.eql(u8, uri, xmlns_ns)) {
-            return error.InvalidNsBinding;
-        }
-        try self.bindInner(allocator, "", uri);
-    }
-
-    /// Binds a prefix in the current scope.
-    ///
-    /// Only valid if there is a current scope.
-    pub fn bindPrefix(self: *NamespaceContext, allocator: Allocator, prefix: []const u8, uri: []const u8) !void {
-        if (!syntax.isNcName(prefix)) {
-            return error.InvalidQName;
-        }
-        if (mem.eql(u8, prefix, "xml") and !mem.eql(u8, uri, xml_ns)) {
-            return error.InvalidNsBinding;
-        }
-        if (mem.eql(u8, uri, xml_ns) and !mem.eql(u8, prefix, "xml")) {
-            return error.InvalidNsBinding;
-        }
-        if (mem.eql(u8, prefix, "xmlns")) {
-            return error.InvalidNsBinding;
-        }
-        if (mem.eql(u8, uri, xmlns_ns) and !mem.eql(u8, prefix, "xmlns")) {
-            return error.InvalidNsBinding;
-        }
-        try self.bindInner(allocator, prefix, uri);
-    }
-
-    fn bindInner(self: *NamespaceContext, allocator: Allocator, prefix: []const u8, uri: []const u8) !void {
-        // TODO: validate that uri is a valid URI reference
-        if (prefix.len != 0 and uri.len == 0) {
-            return error.CannotUndeclareNsPrefix;
-        }
-        var bindings = &self.scopes.items[self.scopes.items.len - 1];
-        const key = try allocator.dupe(u8, prefix);
-        errdefer allocator.free(key);
-        const value = try allocator.dupe(u8, uri);
-        errdefer allocator.free(value);
-        // We cannot clobber an existing prefix in this scope because that
-        // would imply a duplicate attribute name, which is validated earlier.
-        try bindings.putNoClobber(allocator, key, value);
-    }
-
-    /// Returns the URI, if any, bound to the given prefix.
-    pub fn getUri(self: NamespaceContext, prefix: []const u8) ?[]const u8 {
-        if (predefined_ns_prefixes.get(prefix)) |uri| {
-            return uri;
-        }
-        return for (0..self.scopes.items.len) |i| {
-            if (self.scopes.items[self.scopes.items.len - i - 1].get(prefix)) |uri| {
-                break if (uri.len > 0) uri else null;
-            }
-        } else null;
-    }
-
-    /// Parses a possibly prefixed name and returns the corresponding `QName`.
-    ///
-    /// `use_default_ns` specifies if the default namespace (if any) should be
-    /// implied for the given name if it is unprefixed. This is appropriate for
-    /// element names but not attribute names, per the namespaces spec.
-    pub fn parseName(self: NamespaceContext, name: []const u8, use_default_ns: bool) !QName {
-        if (mem.indexOfScalar(u8, name, ':')) |sep_pos| {
-            const prefix = name[0..sep_pos];
-            const local = name[sep_pos + 1 ..];
-            if (!syntax.isNcName(prefix) or !syntax.isNcName(local)) {
-                return error.InvalidQName;
-            }
-            const ns = self.getUri(prefix) orelse return error.UndeclaredNsPrefix;
-            return .{ .prefix = prefix, .ns = ns, .local = local };
-        } else if (use_default_ns) {
-            return .{ .ns = self.getUri(""), .local = name };
-        } else {
-            return .{ .local = name };
-        }
-    }
-};
-
-/// A drop-in replacement for `NamespaceContext` which doesn't actually do any
-/// namespace processing.
-pub const NoOpNamespaceContext = struct {
-    pub const Error = error{};
-
-    pub inline fn deinit(_: *NoOpNamespaceContext, _: Allocator) void {}
-
-    pub inline fn startScope(_: *NoOpNamespaceContext, _: Allocator) !void {}
-
-    pub inline fn endScope(_: *NoOpNamespaceContext, _: Allocator) void {}
-
-    pub inline fn bindDefault(_: *NoOpNamespaceContext, _: Allocator, _: []const u8) !void {}
-
-    pub inline fn bindPrefix(_: *NoOpNamespaceContext, _: Allocator, _: []const u8, _: []const u8) !void {}
-
-    pub inline fn getUri(_: NoOpNamespaceContext, _: []const u8) ?[]const u8 {
-        return null;
-    }
-
-    pub inline fn parseName(_: NoOpNamespaceContext, name: []const u8, _: bool) !QName {
-        return .{ .local = name };
-    }
-};
-
-/// Returns a `Reader` wrapping a `std.io.Reader`.
-pub fn reader(
-    allocator: Allocator,
-    r: anytype,
-    comptime options: ReaderOptions,
-) Reader(@TypeOf(r), options) {
-    return Reader(@TypeOf(r), options).init(allocator, r, .{});
-}
-
-/// Reads a full XML document from a `std.io.Reader`.
-pub fn readDocument(
-    allocator: Allocator,
-    r: anytype,
-    comptime options: ReaderOptions,
-) !OwnedValue(Node.Document) {
-    var arena = ArenaAllocator.init(allocator);
-    errdefer arena.deinit();
-    const node_allocator = arena.allocator();
-
-    var decl_version: []const u8 = "1.0";
-    var decl_encoding: ?[]const u8 = null;
-    var decl_standalone: ?bool = null;
-    var children = ArrayListUnmanaged(Node){};
-
-    var xml_reader = reader(allocator, r, options);
-    defer xml_reader.deinit();
-    while (try xml_reader.next()) |event| {
-        switch (event) {
-            .xml_declaration => |xml_declaration| {
-                decl_version = try node_allocator.dupe(u8, xml_declaration.version);
-                if (xml_declaration.encoding) |e| {
-                    decl_encoding = try node_allocator.dupe(u8, e);
-                }
-                decl_standalone = xml_declaration.standalone;
-            },
-            .element_start => |element_start| try children.append(node_allocator, .{
-                .element = try xml_reader.nextElementNode(node_allocator, element_start),
-            }),
-            .comment => |comment| try children.append(node_allocator, .{ .comment = .{
-                .content = try node_allocator.dupe(u8, comment.content),
-            } }),
-            .pi => |pi| try children.append(node_allocator, .{ .pi = .{
-                .target = try node_allocator.dupe(u8, pi.target),
-                .content = try node_allocator.dupe(u8, pi.content),
-            } }),
-            else => unreachable,
-        }
-    }
-
-    return .{
-        .value = .{
-            .version = decl_version,
-            .encoding = decl_encoding,
-            .standalone = decl_standalone,
-            .children = children.items,
-        },
-        .arena = arena,
-    };
-}
-
-/// Options for a `Reader`.
-pub const ReaderOptions = struct {
-    /// The type of decoder to use.
-    DecoderType: type = encoding.DefaultDecoder,
-    /// The size of the internal buffer.
-    ///
-    /// This limits the byte length of "non-splittable" content, such as
-    /// element and attribute names. Longer such content will result in
-    /// `error.Overflow`.
-    buffer_size: usize = 4096,
-    /// Whether to normalize line endings and attribute values according to the
-    /// XML specification.
-    ///
-    /// If this is set to false, no normalization will be done: for example,
-    /// the line ending sequence `\r\n` will appear as-is in returned events
-    /// rather than the normalized `\n`.
-    enable_normalization: bool = true,
-    /// Whether namespace information should be processed.
-    ///
-    /// If this is false, then `QName`s in the returned events will have only
-    /// their `local` field populated, containing the full name of the element
-    /// or attribute.
-    namespace_aware: bool = true,
-    /// Whether to keep track of the current location in the document.
-    track_location: bool = false,
-};
-
-/// A streaming, pull-based XML parser wrapping a `std.io.Reader`.
-///
-/// This parser behaves similarly to Go's `encoding/xml` package. It is a
-/// higher-level abstraction over a `TokenReader` which uses an internal
-/// allocator to keep track of additional context. It performs additional
-/// well-formedness checks which the lower-level parsers cannot perform due to
-/// their design, such as ensuring element start and end tags match and
-/// attribute names are not duplicated. It is also able to process namespace
-/// information.
-///
-/// Since this parser wraps a `TokenReader`, the caveats on the `buffer_size`
-/// bounding the length of "non-splittable" content which are outlined in its
-/// documentation apply here as well.
-pub fn Reader(comptime ReaderType: type, comptime options: ReaderOptions) type {
-    return struct {
-        token_reader: TokenReaderType,
-        /// A stack of element names enclosing the current context.
-        element_names: ArrayListUnmanaged([]u8) = .{},
-        /// The namespace context of the reader.
-        namespace_context: NamespaceContextType = .{},
-        /// A pending token which has been read but has not yet been handled as
-        /// part of an event.
-        pending_token: ?Token = null,
-        /// A buffer for storing encoded Unicode codepoint data.
-        codepoint_buf: [max_encoded_codepoint_len]u8 = undefined,
-        /// A "buffer" for handling the contents of the next pending event.
-        pending_event: union(enum) {
-            none,
-            element_start: struct {
-                name: []const u8,
-                attributes: StringArrayHashMapUnmanaged(ArrayListUnmanaged(u8)) = .{},
-            },
-            comment: struct { content: ArrayListUnmanaged(u8) = .{} },
-            pi: struct { target: []const u8, content: ArrayListUnmanaged(u8) = .{} },
-        } = .none,
-        /// An arena to store memory for `pending_event` (and the event after
-        /// it's returned).
-        event_arena: ArenaAllocator,
-        allocator: Allocator,
-
-        const Self = @This();
-        const TokenReaderType = TokenReader(ReaderType, .{
-            .DecoderType = options.DecoderType,
-            .buffer_size = options.buffer_size,
-            .enable_normalization = options.enable_normalization,
-            .track_location = options.track_location,
-        });
-        const NamespaceContextType = if (options.namespace_aware) NamespaceContext else NoOpNamespaceContext;
-
-        pub const Error = error{
-            DuplicateAttribute,
-            MismatchedEndTag,
-            UndeclaredEntityReference,
-        } || Allocator.Error || TokenReaderType.Error || NamespaceContextType.Error;
-
-        pub fn init(allocator: Allocator, r: ReaderType, decoder: options.DecoderType) Self {
-            return .{
-                .token_reader = TokenReaderType.init(r, decoder),
-                .event_arena = ArenaAllocator.init(allocator),
-                .allocator = allocator,
-            };
-        }
-
-        pub fn deinit(self: *Self) void {
-            for (self.element_names.items) |name| {
-                self.allocator.free(name);
-            }
-            self.element_names.deinit(self.allocator);
-            self.namespace_context.deinit(self.allocator);
-            self.event_arena.deinit();
-            self.* = undefined;
-        }
-
-        /// Returns the next event from the input.
-        ///
-        /// The returned event is only valid until the next reader operation.
-        pub fn next(self: *Self) Error!?Event {
-            _ = self.event_arena.reset(.retain_capacity);
-            const event_allocator = self.event_arena.allocator();
-            while (true) {
-                switch (try self.nextToken()) {
-                    .eof => return null,
-                    .xml_declaration => return .{ .xml_declaration = .{
-                        .version = self.token_reader.token_data.xml_declaration.version,
-                        .encoding = self.token_reader.token_data.xml_declaration.encoding,
-                        .standalone = self.token_reader.token_data.xml_declaration.standalone,
-                    } },
-                    .element_start => {
-                        if (try self.finalizePendingEvent()) |event| {
-                            self.pending_token = .element_start;
-                            return event;
-                        }
-                        const name = try self.allocator.dupe(u8, self.token_reader.token_data.element_start.name);
-                        errdefer self.allocator.free(name);
-                        try self.element_names.append(self.allocator, name);
-                        errdefer _ = self.element_names.pop();
-                        try self.namespace_context.startScope(self.allocator);
-                        self.pending_event = .{ .element_start = .{ .name = name } };
-                    },
-                    .element_content => {
-                        if (try self.finalizePendingEvent()) |event| {
-                            self.pending_token = .element_content;
-                            return event;
-                        }
-                        return .{ .element_content = .{
-                            .content = try self.contentText(self.token_reader.token_data.element_content.content),
-                        } };
-                    },
-                    .element_end => {
-                        if (try self.finalizePendingEvent()) |event| {
-                            self.pending_token = .element_end;
-                            return event;
-                        }
-                        const expected_name = self.element_names.pop();
-                        defer self.allocator.free(expected_name);
-                        if (!mem.eql(u8, expected_name, self.token_reader.token_data.element_end.name)) {
-                            return error.MismatchedEndTag;
-                        }
-                        var qname = try self.namespace_context.parseName(self.token_reader.token_data.element_end.name, true);
-                        try qname.dupNs(event_allocator);
-                        self.namespace_context.endScope(self.allocator);
-                        return .{ .element_end = .{ .name = qname } };
-                    },
-                    .element_end_empty => {
-                        if (try self.finalizePendingEvent()) |event| {
-                            self.pending_token = .element_end_empty;
-                            return event;
-                        }
-                        const name = self.element_names.pop();
-                        defer self.allocator.free(name);
-                        const dup_name = try event_allocator.dupe(u8, name);
-                        var qname = try self.namespace_context.parseName(dup_name, true);
-                        try qname.dupNs(event_allocator);
-                        self.namespace_context.endScope(self.allocator);
-                        return .{ .element_end = .{ .name = qname } };
-                    },
-                    .attribute_start => {
-                        const attr_entry = try self.pending_event.element_start.attributes.getOrPut(
-                            event_allocator,
-                            self.token_reader.token_data.attribute_start.name,
-                        );
-                        if (attr_entry.found_existing) {
-                            return error.DuplicateAttribute;
-                        }
-                        // The attribute name will be invalidated after we get
-                        // the next token, so we have to duplicate it here.
-                        // This doesn't change the hash of the key, so it's
-                        // safe to do this.
-                        attr_entry.key_ptr.* = try event_allocator.dupe(u8, self.token_reader.token_data.attribute_start.name);
-                        attr_entry.value_ptr.* = .{};
-                    },
-                    .attribute_content => {
-                        const attributes = self.pending_event.element_start.attributes.values();
-                        try attributes[attributes.len - 1].appendSlice(event_allocator, try self.contentText(self.token_reader.token_data.attribute_content.content));
-                    },
-                    .comment_start => {
-                        if (try self.finalizePendingEvent()) |event| {
-                            self.pending_token = .comment_start;
-                            return event;
-                        }
-                        self.pending_event = .{ .comment = .{} };
-                    },
-                    .comment_content => {
-                        try self.pending_event.comment.content.appendSlice(event_allocator, self.token_reader.token_data.comment_content.content);
-                        if (self.token_reader.token_data.comment_content.final) {
-                            const event = Event{ .comment = .{ .content = self.pending_event.comment.content.items } };
-                            self.pending_event = .none;
-                            return event;
-                        }
-                    },
-                    .pi_start => {
-                        if (try self.finalizePendingEvent()) |event| {
-                            self.pending_token = .pi_start;
-                            return event;
-                        }
-                        if (options.namespace_aware and mem.indexOfScalar(u8, self.token_reader.token_data.pi_start.target, ':') != null) {
-                            return error.QNameNotAllowed;
-                        }
-                        self.pending_event = .{ .pi = .{
-                            .target = try event_allocator.dupe(u8, self.token_reader.token_data.pi_start.target),
-                        } };
-                    },
-                    .pi_content => {
-                        try self.pending_event.pi.content.appendSlice(event_allocator, self.token_reader.token_data.pi_content.content);
-                        if (self.token_reader.token_data.pi_content.final) {
-                            const event = Event{ .pi = .{
-                                .target = self.pending_event.pi.target,
-                                .content = self.pending_event.pi.content.items,
-                            } };
-                            self.pending_event = .none;
-                            return event;
-                        }
-                    },
-                }
-            }
-        }
-
-        fn nextToken(self: *Self) !Token {
-            if (self.pending_token) |token| {
-                self.pending_token = null;
-                return token;
-            }
-            return try self.token_reader.next();
-        }
-
-        fn finalizePendingEvent(self: *Self) !?Event {
-            const event_allocator = self.event_arena.allocator();
-            switch (self.pending_event) {
-                .none => return null,
-                .element_start => |element_start| {
-                    // Bind all xmlns declarations in the current element
-                    for (element_start.attributes.keys(), element_start.attributes.values()) |attr_name, attr_value| {
-                        if (mem.eql(u8, attr_name, "xmlns")) {
-                            try self.namespace_context.bindDefault(self.allocator, attr_value.items);
-                        } else if (mem.startsWith(u8, attr_name, "xmlns:")) {
-                            try self.namespace_context.bindPrefix(self.allocator, attr_name["xmlns:".len..], attr_value.items);
-                        }
-                    }
-
-                    // Convert the element and attribute names to QNames
-                    const qname = try self.namespace_context.parseName(element_start.name, true);
-                    var attributes = ArrayListUnmanaged(Event.Attribute){};
-                    try attributes.ensureTotalCapacity(event_allocator, element_start.attributes.count());
-                    // When namespaces are enabled, we need to check uniqueness
-                    // of attribute QNames according to the namespaces spec
-                    var attr_qnames = if (options.namespace_aware) QNameSet{};
-                    if (options.namespace_aware) {
-                        try attr_qnames.ensureTotalCapacity(event_allocator, @intCast(element_start.attributes.count()));
-                    }
-                    for (element_start.attributes.keys(), element_start.attributes.values()) |attr_name, attr_value| {
-                        const attr_qname = try self.namespace_context.parseName(attr_name, false);
-                        attributes.appendAssumeCapacity(.{ .name = attr_qname, .value = attr_value.items });
-                        if (options.namespace_aware) {
-                            const entry = attr_qnames.getOrPutAssumeCapacity(attr_qname);
-                            if (entry.found_existing) {
-                                return error.DuplicateAttribute;
-                            }
-                        }
-                    }
-
-                    self.pending_event = .none;
-                    return .{ .element_start = .{ .name = qname, .attributes = attributes.items } };
-                },
-                // Other pending events will have already been handled by
-                // looking at the 'final' content event
-                else => unreachable,
-            }
-        }
-
-        fn contentText(self: *Self, content: Token.Content) ![]const u8 {
-            return switch (content) {
-                .text => |text| text,
-                .codepoint => |codepoint| text: {
-                    const len = unicode.utf8Encode(codepoint, &self.codepoint_buf) catch unreachable;
-                    break :text self.codepoint_buf[0..len];
-                },
-                .entity => |entity| entities.get(entity) orelse return error.UndeclaredEntityReference,
-            };
-        }
-
-        pub fn nextNode(self: *Self, allocator: Allocator, element_start: Event.ElementStart) Error!OwnedValue(Node.Element) {
-            var arena = ArenaAllocator.init(allocator);
-            errdefer arena.deinit();
-            return .{
-                .value = try self.nextElementNode(arena.allocator(), element_start),
-                .arena = arena,
-            };
-        }
-
-        fn nextElementNode(self: *Self, allocator: Allocator, element_start: Event.ElementStart) Error!Node.Element {
-            const name = try element_start.name.clone(allocator);
-            var element_children = ArrayListUnmanaged(Node){};
-            try element_children.ensureTotalCapacity(allocator, element_start.attributes.len);
-            for (element_start.attributes) |attr| {
-                element_children.appendAssumeCapacity(.{ .attribute = .{
-                    .name = try attr.name.clone(allocator),
-                    .value = try allocator.dupe(u8, attr.value),
-                } });
-            }
-            var current_content = ArrayListUnmanaged(u8){};
-            while (try self.next()) |event| {
-                if (event != .element_content and current_content.items.len > 0) {
-                    try element_children.append(allocator, .{ .text = .{ .content = current_content.items } });
-                    current_content = .{};
-                }
-                switch (event) {
-                    .xml_declaration => unreachable,
-                    .element_start => |sub_element_start| try element_children.append(allocator, .{
-                        .element = try self.nextElementNode(allocator, sub_element_start),
-                    }),
-                    .element_content => |element_content| try current_content.appendSlice(allocator, element_content.content),
-                    .element_end => return .{ .name = name, .children = element_children.items },
-                    .comment => |comment| try element_children.append(allocator, .{ .comment = .{
-                        .content = try allocator.dupe(u8, comment.content),
-                    } }),
-                    .pi => |pi| try element_children.append(allocator, .{ .pi = .{
-                        .target = try allocator.dupe(u8, pi.target),
-                        .content = try allocator.dupe(u8, pi.content),
-                    } }),
-                }
-            }
-            unreachable;
-        }
-
-        /// Returns an iterator over the remaining children of the current
-        /// element.
-        ///
-        /// Note that, since the returned iterator's `next` function calls the
-        /// `next` function of this reader internally, such calls will
-        /// invalidate any event returned prior to calling this function.
-        pub fn children(self: *Self) Children(Self) {
-            return .{ .reader = self, .start_depth = self.element_names.items.len };
-        }
-    };
-}
-
-fn Children(comptime ReaderType: type) type {
-    return struct {
-        reader: *ReaderType,
-        start_depth: usize,
-
-        const Self = @This();
-
-        /// Returns the next event.
-        ///
-        /// This function must not be called after it initially returns null.
-        pub fn next(self: Self) ReaderType.Error!?Event {
-            return switch (try self.reader.next() orelse return null) {
-                .element_end => |element_end| if (self.reader.element_names.items.len >= self.start_depth) .{ .element_end = element_end } else null,
-                else => |event| event,
-            };
-        }
-
-        /// Returns an iterator over the remaining children of the current
-        /// element.
-        ///
-        /// This may not be used after `next` returns null.
-        pub fn children(self: Self) Self {
-            return self.reader.children();
-        }
-
-        /// Skips the remaining children.
-        ///
-        /// `next` and `children` must not be used after this.
-        pub fn skip(self: Self) ReaderType.Error!void {
-            while (try self.next()) |_| {}
-        }
-    };
-}
-
-test Reader {
-    try testValid(.{},
-        \\<?xml version="1.0"?>
-        \\<?some-pi?>
-        \\<!-- A processing instruction with content follows -->
-        \\<?some-pi-with-content content?>
-        \\<root>
-        \\  <p class="test">Hello, <![CDATA[world!]]></p>
-        \\  <line />
-        \\  <?another-pi?>
-        \\  Text content goes here.
-        \\  <div><p>&amp;</p></div>
-        \\</root>
-        \\<!-- Comments are allowed after the end of the root element -->
-        \\
-        \\<?comment So are PIs ?>
-        \\
-        \\
-    , &.{
-        .{ .xml_declaration = .{ .version = "1.0" } },
-        .{ .pi = .{ .target = "some-pi", .content = "" } },
-        .{ .comment = .{ .content = " A processing instruction with content follows " } },
-        .{ .pi = .{ .target = "some-pi-with-content", .content = "content" } },
-        .{ .element_start = .{ .name = .{ .local = "root" } } },
-        .{ .element_content = .{ .content = "\n  " } },
-        .{ .element_start = .{ .name = .{ .local = "p" }, .attributes = &.{
-            .{ .name = .{ .local = "class" }, .value = "test" },
-        } } },
-        .{ .element_content = .{ .content = "Hello, " } },
-        .{ .element_content = .{ .content = "world!" } },
-        .{ .element_end = .{ .name = .{ .local = "p" } } },
-        .{ .element_content = .{ .content = "\n  " } },
-        .{ .element_start = .{ .name = .{ .local = "line" } } },
-        .{ .element_end = .{ .name = .{ .local = "line" } } },
-        .{ .element_content = .{ .content = "\n  " } },
-        .{ .pi = .{ .target = "another-pi", .content = "" } },
-        .{ .element_content = .{ .content = "\n  Text content goes here.\n  " } },
-        .{ .element_start = .{ .name = .{ .local = "div" } } },
-        .{ .element_start = .{ .name = .{ .local = "p" } } },
-        .{ .element_content = .{ .content = "&" } },
-        .{ .element_end = .{ .name = .{ .local = "p" } } },
-        .{ .element_end = .{ .name = .{ .local = "div" } } },
-        .{ .element_content = .{ .content = "\n" } },
-        .{ .element_end = .{ .name = .{ .local = "root" } } },
-        .{ .comment = .{ .content = " Comments are allowed after the end of the root element " } },
-        .{ .pi = .{ .target = "comment", .content = "So are PIs " } },
-    });
-}
-
-test "tag name matching" {
-    try testInvalid(.{}, "<one></two>", error.MismatchedEndTag);
-    try testInvalid(.{}, "<one><two></one></two>", error.MismatchedEndTag);
-    try testInvalid(.{}, "<one>Some content</two>More content</one>", error.MismatchedEndTag);
-}
-
-test "namespace handling" {
-    try testValid(.{},
-        \\<a:root xmlns:a="urn:1">
-        \\  <child xmlns="urn:2" xmlns:b="urn:3" attr="value">
-        \\    <b:child xmlns:a="urn:4" b:attr="value">
-        \\      <a:child />
-        \\    </b:child>
-        \\  </child>
-        \\</a:root>
-    , &.{
-        .{ .element_start = .{ .name = .{ .prefix = "a", .ns = "urn:1", .local = "root" }, .attributes = &.{
-            .{ .name = .{ .prefix = "xmlns", .ns = xmlns_ns, .local = "a" }, .value = "urn:1" },
-        } } },
-        .{ .element_content = .{ .content = "\n  " } },
-        .{ .element_start = .{ .name = .{ .ns = "urn:2", .local = "child" }, .attributes = &.{
-            .{ .name = .{ .local = "xmlns" }, .value = "urn:2" },
-            .{ .name = .{ .prefix = "xmlns", .ns = xmlns_ns, .local = "b" }, .value = "urn:3" },
-            .{ .name = .{ .local = "attr" }, .value = "value" },
-        } } },
-        .{ .element_content = .{ .content = "\n    " } },
-        .{ .element_start = .{ .name = .{ .prefix = "b", .ns = "urn:3", .local = "child" }, .attributes = &.{
-            .{ .name = .{ .prefix = "xmlns", .ns = xmlns_ns, .local = "a" }, .value = "urn:4" },
-            .{ .name = .{ .prefix = "b", .ns = "urn:3", .local = "attr" }, .value = "value" },
-        } } },
-        .{ .element_content = .{ .content = "\n      " } },
-        .{ .element_start = .{ .name = .{ .prefix = "a", .ns = "urn:4", .local = "child" } } },
-        .{ .element_end = .{ .name = .{ .prefix = "a", .ns = "urn:4", .local = "child" } } },
-        .{ .element_content = .{ .content = "\n    " } },
-        .{ .element_end = .{ .name = .{ .prefix = "b", .ns = "urn:3", .local = "child" } } },
-        .{ .element_content = .{ .content = "\n  " } },
-        .{ .element_end = .{ .name = .{ .ns = "urn:2", .local = "child" } } },
-        .{ .element_content = .{ .content = "\n" } },
-        .{ .element_end = .{ .name = .{ .prefix = "a", .ns = "urn:1", .local = "root" } } },
-    });
-    try testInvalid(.{}, "<a:root />", error.UndeclaredNsPrefix);
-    try testInvalid(.{}, "<: />", error.InvalidQName);
-    try testInvalid(.{}, "<a: />", error.InvalidQName);
-    try testInvalid(.{}, "<:a />", error.InvalidQName);
-    try testInvalid(.{}, "<root xmlns:='urn:1' />", error.InvalidQName);
-    try testInvalid(.{}, "<root xmlns::='urn:1' />", error.InvalidQName);
-    try testInvalid(.{}, "<root xmlns:a:b='urn:1' />", error.InvalidQName);
-    try testInvalid(.{}, "<root xmlns='urn:1' xmlns='urn:2' />", error.DuplicateAttribute);
-    try testInvalid(.{}, "<root xmlns:abc='urn:1' xmlns:abc='urn:2' />", error.DuplicateAttribute);
-    try testInvalid(.{}, "<root xmlns:a='urn:1' xmlns:b='urn:1'><a a:attr='1' b:attr='2' /></root>", error.DuplicateAttribute);
-    try testInvalid(.{}, "<root xmlns='http://www.w3.org/XML/1998/namespace' />", error.InvalidNsBinding);
-    try testInvalid(.{}, "<root xmlns:xml='urn:1' />", error.InvalidNsBinding);
-    try testValid(.{}, "<root xmlns:xml='http://www.w3.org/XML/1998/namespace' />", &.{
-        .{ .element_start = .{ .name = .{ .local = "root" }, .attributes = &.{
-            .{ .name = .{ .prefix = "xmlns", .ns = xmlns_ns, .local = "xml" }, .value = "http://www.w3.org/XML/1998/namespace" },
-        } } },
-        .{ .element_end = .{ .name = .{ .local = "root" } } },
-    });
-    try testInvalid(.{}, "<root xmlns:not-xml='http://www.w3.org/XML/1998/namespace' />", error.InvalidNsBinding);
-    try testInvalid(.{}, "<root xmlns='http://www.w3.org/2000/xmlns/' />", error.InvalidNsBinding);
-    try testInvalid(.{}, "<root xmlns:xmlns='urn:1' />", error.InvalidNsBinding);
-    try testInvalid(.{}, "<root xmlns:xmlns='http://www.w3.org/2000/xmlns/' />", error.InvalidNsBinding);
-    try testInvalid(.{}, "<root xmlns:not-xmlns='http://www.w3.org/2000/xmlns/' />", error.InvalidNsBinding);
-    try testInvalid(.{}, "<root><?ns:pi?></root>", error.QNameNotAllowed);
-
-    try testValid(.{ .namespace_aware = false },
-        \\<a:root xmlns:a="urn:1">
-        \\  <child xmlns="urn:2" xmlns:b="urn:3" attr="value">
-        \\    <b:child xmlns:a="urn:4" b:attr="value">
-        \\      <a:child />
-        \\    </b:child>
-        \\  </child>
-        \\</a:root>
-    , &.{
-        .{ .element_start = .{ .name = .{ .local = "a:root" }, .attributes = &.{
-            .{ .name = .{ .local = "xmlns:a" }, .value = "urn:1" },
-        } } },
-        .{ .element_content = .{ .content = "\n  " } },
-        .{ .element_start = .{ .name = .{ .local = "child" }, .attributes = &.{
-            .{ .name = .{ .local = "xmlns" }, .value = "urn:2" },
-            .{ .name = .{ .local = "xmlns:b" }, .value = "urn:3" },
-            .{ .name = .{ .local = "attr" }, .value = "value" },
-        } } },
-        .{ .element_content = .{ .content = "\n    " } },
-        .{ .element_start = .{ .name = .{ .local = "b:child" }, .attributes = &.{
-            .{ .name = .{ .local = "xmlns:a" }, .value = "urn:4" },
-            .{ .name = .{ .local = "b:attr" }, .value = "value" },
-        } } },
-        .{ .element_content = .{ .content = "\n      " } },
-        .{ .element_start = .{ .name = .{ .local = "a:child" } } },
-        .{ .element_end = .{ .name = .{ .local = "a:child" } } },
-        .{ .element_content = .{ .content = "\n    " } },
-        .{ .element_end = .{ .name = .{ .local = "b:child" } } },
-        .{ .element_content = .{ .content = "\n  " } },
-        .{ .element_end = .{ .name = .{ .local = "child" } } },
-        .{ .element_content = .{ .content = "\n" } },
-        .{ .element_end = .{ .name = .{ .local = "a:root" } } },
-    });
-    try testValid(.{ .namespace_aware = false }, "<a:root />", &.{
-        .{ .element_start = .{ .name = .{ .local = "a:root" } } },
-        .{ .element_end = .{ .name = .{ .local = "a:root" } } },
-    });
-    try testValid(.{ .namespace_aware = false }, "<: />", &.{
-        .{ .element_start = .{ .name = .{ .local = ":" } } },
-        .{ .element_end = .{ .name = .{ .local = ":" } } },
-    });
-    try testValid(.{ .namespace_aware = false }, "<a: />", &.{
-        .{ .element_start = .{ .name = .{ .local = "a:" } } },
-        .{ .element_end = .{ .name = .{ .local = "a:" } } },
-    });
-    try testValid(.{ .namespace_aware = false }, "<:a />", &.{
-        .{ .element_start = .{ .name = .{ .local = ":a" } } },
-        .{ .element_end = .{ .name = .{ .local = ":a" } } },
-    });
-    try testValid(.{ .namespace_aware = false }, "<root xmlns:='urn:1' />", &.{
-        .{ .element_start = .{ .name = .{ .local = "root" }, .attributes = &.{
-            .{ .name = .{ .local = "xmlns:" }, .value = "urn:1" },
-        } } },
-        .{ .element_end = .{ .name = .{ .local = "root" } } },
-    });
-    try testValid(.{ .namespace_aware = false }, "<root xmlns::='urn:1' />", &.{
-        .{ .element_start = .{ .name = .{ .local = "root" }, .attributes = &.{
-            .{ .name = .{ .local = "xmlns::" }, .value = "urn:1" },
-        } } },
-        .{ .element_end = .{ .name = .{ .local = "root" } } },
-    });
-    try testValid(.{ .namespace_aware = false }, "<root xmlns:a:b='urn:1' />", &.{
-        .{ .element_start = .{ .name = .{ .local = "root" }, .attributes = &.{
-            .{ .name = .{ .local = "xmlns:a:b" }, .value = "urn:1" },
-        } } },
-        .{ .element_end = .{ .name = .{ .local = "root" } } },
-    });
-    try testInvalid(.{ .namespace_aware = false }, "<root xmlns='urn:1' xmlns='urn:2' />", error.DuplicateAttribute);
-    try testInvalid(.{ .namespace_aware = false }, "<root xmlns:abc='urn:1' xmlns:abc='urn:2' />", error.DuplicateAttribute);
-    try testValid(.{ .namespace_aware = false }, "<root><?ns:pi?></root>", &.{
-        .{ .element_start = .{ .name = .{ .local = "root" } } },
-        .{ .pi = .{ .target = "ns:pi", .content = "" } },
-        .{ .element_end = .{ .name = .{ .local = "root" } } },
-    });
-}
-
-fn testValid(comptime options: ReaderOptions, input: []const u8, expected_events: []const Event) !void {
-    var input_stream = std.io.fixedBufferStream(input);
-    var input_reader = reader(testing.allocator, input_stream.reader(), options);
-    defer input_reader.deinit();
-    var i: usize = 0;
-    while (try input_reader.next()) |event| : (i += 1) {
-        if (i >= expected_events.len) {
-            std.debug.print("Unexpected event after end: {}\n", .{event});
-            return error.TestFailed;
-        }
-        testing.expectEqualDeep(expected_events[i], event) catch |e| {
-            std.debug.print("(at index {})\n", .{i});
-            return e;
-        };
-    }
-    if (i != expected_events.len) {
-        std.debug.print("Expected {} events, found {}\n", .{ expected_events.len, i });
-        return error.TestFailed;
-    }
-}
-
-fn testInvalid(comptime options: ReaderOptions, input: []const u8, expected_error: anyerror) !void {
-    var input_stream = std.io.fixedBufferStream(input);
-    var input_reader = reader(testing.allocator, input_stream.reader(), options);
-    defer input_reader.deinit();
-    while (input_reader.next()) |_| {} else |err| {
-        try testing.expectEqual(expected_error, err);
-    }
-}
-
-test "nextNode" {
-    var input_stream = std.io.fixedBufferStream(
-        \\<?xml version="1.0"?>
-        \\<?some-pi?>
-        \\<!-- A processing instruction with content follows -->
-        \\<?some-pi-with-content content?>
-        \\<root>
-        \\  <p class="test">Hello, <![CDATA[world!]]></p>
-        \\  <line />
-        \\  <?another-pi?>
-        \\  Text content goes here.
-        \\  <div><p>&amp;</p></div>
-        \\</root>
-        \\<!-- Comments are allowed after the end of the root element -->
-        \\
-        \\<?comment So are PIs ?>
-        \\
-        \\
-    );
-    var input_reader = reader(testing.allocator, input_stream.reader(), .{});
-    defer input_reader.deinit();
-
-    try testing.expectEqualDeep(@as(?Event, .{ .xml_declaration = .{ .version = "1.0" } }), try input_reader.next());
-    try testing.expectEqualDeep(@as(?Event, .{ .pi = .{ .target = "some-pi", .content = "" } }), try input_reader.next());
-    try testing.expectEqualDeep(@as(?Event, .{ .comment = .{ .content = " A processing instruction with content follows " } }), try input_reader.next());
-    try testing.expectEqualDeep(@as(?Event, .{ .pi = .{ .target = "some-pi-with-content", .content = "content" } }), try input_reader.next());
-
-    const root_start = try input_reader.next();
-    try testing.expect(root_start != null and root_start.? == .element_start);
-    var root_node = try input_reader.nextNode(testing.allocator, root_start.?.element_start);
-    defer root_node.deinit();
-    try testing.expectEqualDeep(Node.Element{ .name = .{ .local = "root" }, .children = &.{
-        .{ .text = .{ .content = "\n  " } },
-        .{ .element = .{ .name = .{ .local = "p" }, .children = &.{
-            .{ .attribute = .{ .name = .{ .local = "class" }, .value = "test" } },
-            .{ .text = .{ .content = "Hello, world!" } },
-        } } },
-        .{ .text = .{ .content = "\n  " } },
-        .{ .element = .{ .name = .{ .local = "line" }, .children = &.{} } },
-        .{ .text = .{ .content = "\n  " } },
-        .{ .pi = .{ .target = "another-pi", .content = "" } },
-        .{ .text = .{ .content = "\n  Text content goes here.\n  " } },
-        .{ .element = .{ .name = .{ .local = "div" }, .children = &.{
-            .{ .element = .{ .name = .{ .local = "p" }, .children = &.{
-                .{ .text = .{ .content = "&" } },
-            } } },
-        } } },
-        .{ .text = .{ .content = "\n" } },
-    } }, root_node.value);
-
-    try testing.expectEqualDeep(@as(?Event, .{ .comment = .{ .content = " Comments are allowed after the end of the root element " } }), try input_reader.next());
-    try testing.expectEqualDeep(@as(?Event, .{ .pi = .{ .target = "comment", .content = "So are PIs " } }), try input_reader.next());
-    try testing.expect(try input_reader.next() == null);
-}
-
-test "nextNode namespace handling" {
-    var input_stream = std.io.fixedBufferStream(
-        \\<a:root xmlns:a="urn:1">
-        \\  <child xmlns="urn:2" xmlns:b="urn:3" attr="value">
-        \\    <b:child xmlns:a="urn:4" b:attr="value">
-        \\      <a:child />
-        \\    </b:child>
-        \\  </child>
-        \\</a:root>
-    );
-    var input_reader = reader(testing.allocator, input_stream.reader(), .{});
-    defer input_reader.deinit();
-
-    const root_start = try input_reader.next();
-    try testing.expect(root_start != null and root_start.? == .element_start);
-    var root_node = try input_reader.nextNode(testing.allocator, root_start.?.element_start);
-    defer root_node.deinit();
-    try testing.expectEqualDeep(Node.Element{ .name = .{ .prefix = "a", .ns = "urn:1", .local = "root" }, .children = &.{
-        .{ .attribute = .{ .name = .{ .prefix = "xmlns", .ns = xmlns_ns, .local = "a" }, .value = "urn:1" } },
-        .{ .text = .{ .content = "\n  " } },
-        .{ .element = .{ .name = .{ .ns = "urn:2", .local = "child" }, .children = &.{
-            .{ .attribute = .{ .name = .{ .local = "xmlns" }, .value = "urn:2" } },
-            .{ .attribute = .{ .name = .{ .prefix = "xmlns", .ns = xmlns_ns, .local = "b" }, .value = "urn:3" } },
-            .{ .attribute = .{ .name = .{ .local = "attr" }, .value = "value" } },
-            .{ .text = .{ .content = "\n    " } },
-            .{ .element = .{ .name = .{ .prefix = "b", .ns = "urn:3", .local = "child" }, .children = &.{
-                .{ .attribute = .{ .name = .{ .prefix = "xmlns", .ns = xmlns_ns, .local = "a" }, .value = "urn:4" } },
-                .{ .attribute = .{ .name = .{ .prefix = "b", .ns = "urn:3", .local = "attr" }, .value = "value" } },
-                .{ .text = .{ .content = "\n      " } },
-                .{ .element = .{ .name = .{ .prefix = "a", .ns = "urn:4", .local = "child" } } },
-                .{ .text = .{ .content = "\n    " } },
-            } } },
-            .{ .text = .{ .content = "\n  " } },
-        } } },
-        .{ .text = .{ .content = "\n" } },
-    } }, root_node.value);
-}
-
-test readDocument {
-    var input_stream = std.io.fixedBufferStream(
-        \\<?xml version="1.0"?>
-        \\<?some-pi?>
-        \\<!-- A processing instruction with content follows -->
-        \\<?some-pi-with-content content?>
-        \\<root>
-        \\  <p class="test">Hello, <![CDATA[world!]]></p>
-        \\  <line />
-        \\  <?another-pi?>
-        \\  Text content goes here.
-        \\  <div><p>&amp;</p></div>
-        \\</root>
-        \\<!-- Comments are allowed after the end of the root element -->
-        \\
-        \\<?comment So are PIs ?>
-        \\
-        \\
-    );
-    var document_node = try readDocument(testing.allocator, input_stream.reader(), .{});
-    defer document_node.deinit();
-
-    try testing.expectEqualDeep(Node.Document{ .version = "1.0", .children = &.{
-        .{ .pi = .{ .target = "some-pi", .content = "" } },
-        .{ .comment = .{ .content = " A processing instruction with content follows " } },
-        .{ .pi = .{ .target = "some-pi-with-content", .content = "content" } },
-        .{ .element = .{ .name = .{ .local = "root" }, .children = &.{
-            .{ .text = .{ .content = "\n  " } },
-            .{ .element = .{ .name = .{ .local = "p" }, .children = &.{
-                .{ .attribute = .{ .name = .{ .local = "class" }, .value = "test" } },
-                .{ .text = .{ .content = "Hello, world!" } },
-            } } },
-            .{ .text = .{ .content = "\n  " } },
-            .{ .element = .{ .name = .{ .local = "line" }, .children = &.{} } },
-            .{ .text = .{ .content = "\n  " } },
-            .{ .pi = .{ .target = "another-pi", .content = "" } },
-            .{ .text = .{ .content = "\n  Text content goes here.\n  " } },
-            .{ .element = .{ .name = .{ .local = "div" }, .children = &.{
-                .{ .element = .{ .name = .{ .local = "p" }, .children = &.{
-                    .{ .text = .{ .content = "&" } },
-                } } },
-            } } },
-            .{ .text = .{ .content = "\n" } },
-        } } },
-        .{ .comment = .{ .content = " Comments are allowed after the end of the root element " } },
-        .{ .pi = .{ .target = "comment", .content = "So are PIs " } },
-    } }, document_node.value);
-}
-
-test Children {
-    var input_stream = std.io.fixedBufferStream(
-        \\<root>
-        \\  Hello, world!
-        \\  <child1 attr="value">Some content.</child1>
-        \\  <child2><!-- Comment --><child3/></child2>
-        \\</root>
-    );
-    var input_reader = reader(testing.allocator, input_stream.reader(), .{});
-    defer input_reader.deinit();
-
-    try testing.expectEqualDeep(@as(?Event, .{ .element_start = .{ .name = .{ .local = "root" } } }), try input_reader.next());
-    const root_children = input_reader.children();
-    try testing.expectEqualDeep(@as(?Event, .{ .element_content = .{ .content = "\n  Hello, world!\n  " } }), try root_children.next());
-    try testing.expectEqualDeep(@as(?Event, .{ .element_start = .{ .name = .{ .local = "child1" }, .attributes = &.{
-        .{ .name = .{ .local = "attr" }, .value = "value" },
-    } } }), try root_children.next());
-    const child1_children = root_children.children();
-    try testing.expectEqualDeep(@as(?Event, .{ .element_content = .{ .content = "Some content." } }), try child1_children.next());
-    try testing.expectEqual(@as(?Event, null), try child1_children.next());
-    try testing.expectEqualDeep(@as(?Event, .{ .element_content = .{ .content = "\n  " } }), try root_children.next());
-    try testing.expectEqualDeep(@as(?Event, .{ .element_start = .{ .name = .{ .local = "child2" } } }), try root_children.next());
-    const child2_children = root_children.children();
-    try testing.expectEqualDeep(@as(?Event, .{ .comment = .{ .content = " Comment " } }), try child2_children.next());
-    try testing.expectEqualDeep(@as(?Event, .{ .element_start = .{ .name = .{ .local = "child3" } } }), try child2_children.next());
-    const child3_children = child2_children.children();
-    try testing.expectEqual(@as(?Event, null), try child3_children.next());
-    try testing.expectEqual(@as(?Event, null), try child2_children.next());
-    try testing.expectEqualDeep(@as(?Event, .{ .element_content = .{ .content = "\n" } }), try root_children.next());
-    try testing.expectEqual(@as(?Event, null), try root_children.next());
-}
-
-test "skip children" {
-    var input_stream = std.io.fixedBufferStream(
-        \\<root>
-        \\  Hello, world!
-        \\  <child1 attr="value">Some content.</child1>
-        \\  <child2><!-- Comment --><child3/></child2>
-        \\</root>
-    );
-    var input_reader = reader(testing.allocator, input_stream.reader(), .{});
-    defer input_reader.deinit();
-
-    try testing.expectEqualDeep(@as(?Event, .{ .element_start = .{ .name = .{ .local = "root" } } }), try input_reader.next());
-    const root_children = input_reader.children();
-    try root_children.skip();
-    try testing.expectEqual(@as(?Event, null), try input_reader.next());
-}
diff --git a/src/syntax.zig b/src/syntax.zig
deleted file mode 100644
index 9f16250..0000000
--- a/src/syntax.zig
+++ /dev/null
@@ -1,106 +0,0 @@
-const std = @import("std");
-const unicode = std.unicode;
-
-pub inline fn isChar(c: u21) bool {
-    return switch (c) {
-        '\t', '\r', '\n', ' '...0xD7FF, 0xE000...0xFFFD, 0x10000...0x10FFFF => true,
-        else => false,
-    };
-}
-
-pub inline fn isSpace(c: u21) bool {
-    return switch (c) {
-        ' ', '\t', '\r', '\n' => true,
-        else => false,
-    };
-}
-
-pub inline fn isDigit(c: u21) bool {
-    return switch (c) {
-        '0'...'9' => true,
-        else => false,
-    };
-}
-
-/// Note: only valid if `isDigit` returns true.
-pub inline fn digitValue(c: u21) u4 {
-    return @intCast(c - '0');
-}
-
-pub inline fn isHexDigit(c: u21) bool {
-    return switch (c) {
-        '0'...'9', 'a'...'f', 'A'...'F' => true,
-        else => false,
-    };
-}
-
-/// Note: only valid if `isHexDigit` returns true.
-pub inline fn hexDigitValue(c: u21) u4 {
-    return switch (c) {
-        'a'...'f' => @intCast(c - 'a' + 10),
-        'A'...'F' => @intCast(c - 'A' + 10),
-        else => @intCast(c - '0'),
-    };
-}
-
-/// Checks if `s` matches `NCName` from the namespaces spec.
-///
-/// Note: only valid if `s` is valid UTF-8.
-pub fn isNcName(s: []const u8) bool {
-    var view = unicode.Utf8View.initUnchecked(s);
-    var iter = view.iterator();
-    const first_c = iter.nextCodepoint() orelse return false;
-    if (first_c == ':' or !isNameStartChar(first_c)) {
-        return false;
-    }
-    while (iter.nextCodepoint()) |c| {
-        if (c == ':' or !isNameChar(c)) {
-            return false;
-        }
-    }
-    return true;
-}
-
-pub inline fn isNameStartChar(c: u21) bool {
-    return switch (c) {
-        ':',
-        'A'...'Z',
-        '_',
-        'a'...'z',
-        0xC0...0xD6,
-        0xD8...0xF6,
-        0xF8...0x2FF,
-        0x370...0x37D,
-        0x37F...0x1FFF,
-        0x200C...0x200D,
-        0x2070...0x218F,
-        0x2C00...0x2FEF,
-        0x3001...0xD7FF,
-        0xF900...0xFDCF,
-        0xFDF0...0xFFFD,
-        0x10000...0xEFFFF,
-        => true,
-        else => false,
-    };
-}
-
-pub inline fn isNameChar(c: u21) bool {
-    return if (isNameStartChar(c)) true else switch (c) {
-        '-', '.', '0'...'9', 0xB7, 0x0300...0x036F, 0x203F...0x2040 => true,
-        else => false,
-    };
-}
-
-pub inline fn isEncodingStartChar(c: u21) bool {
-    return switch (c) {
-        'A'...'Z', 'a'...'z' => true,
-        else => false,
-    };
-}
-
-pub inline fn isEncodingChar(c: u21) bool {
-    return switch (c) {
-        'A'...'Z', 'a'...'z', '0'...'9', '.', '_', '-' => true,
-        else => false,
-    };
-}
diff --git a/src/token_reader.zig b/src/token_reader.zig
deleted file mode 100644
index bcf7964..0000000
--- a/src/token_reader.zig
+++ /dev/null
@@ -1,621 +0,0 @@
-const std = @import("std");
-const mem = std.mem;
-const testing = std.testing;
-const unicode = std.unicode;
-const encoding = @import("encoding.zig");
-const Scanner = @import("Scanner.zig");
-
-/// A single XML token.
-///
-/// For efficiency, this is merely an enum specifying the token type. The actual
-/// token data is available in `Token.Data`, in the token reader's `token_data`
-/// field. The `fullToken` function can be used to get a `Token.Full`, which is
-/// a tagged union type and may be easier to consume in certain circumstances.
-pub const Token = enum {
-    /// End of file.
-    eof,
-    /// XML declaration.
-    xml_declaration,
-    /// Element start tag.
-    element_start,
-    /// Element content.
-    element_content,
-    /// Element end tag.
-    element_end,
-    /// End of an empty element.
-    element_end_empty,
-    /// Attribute start.
-    attribute_start,
-    /// Attribute value content.
-    attribute_content,
-    /// Comment start.
-    comment_start,
-    /// Comment content.
-    comment_content,
-    /// Processing instruction (PI) start.
-    pi_start,
-    /// PI content.
-    pi_content,
-
-    /// The data associated with a token.
-    ///
-    /// Even token types which have no associated data are represented here, to
-    /// provide some additional safety in safe build modes (where it can be
-    /// checked whether the caller is referencing the correct data field).
-    pub const Data = union {
-        eof: void,
-        xml_declaration: XmlDeclaration,
-        element_start: ElementStart,
-        element_content: ElementContent,
-        element_end: ElementEnd,
-        element_end_empty: void,
-        attribute_start: AttributeStart,
-        attribute_content: AttributeContent,
-        comment_start: void,
-        comment_content: CommentContent,
-        pi_start: PiStart,
-        pi_content: PiContent,
-    };
-
-    /// A token type plus data represented as a tagged union.
-    pub const Full = union(Token) {
-        eof,
-        xml_declaration: XmlDeclaration,
-        element_start: ElementStart,
-        element_content: ElementContent,
-        element_end: ElementEnd,
-        element_end_empty,
-        attribute_start: AttributeStart,
-        attribute_content: AttributeContent,
-        comment_start,
-        comment_content: CommentContent,
-        pi_start: PiStart,
-        pi_content: PiContent,
-    };
-
-    pub const XmlDeclaration = struct {
-        version: []const u8,
-        encoding: ?[]const u8 = null,
-        standalone: ?bool = null,
-    };
-
-    pub const ElementStart = struct {
-        name: []const u8,
-    };
-
-    pub const ElementContent = struct {
-        content: Content,
-    };
-
-    pub const ElementEnd = struct {
-        name: []const u8,
-    };
-
-    pub const AttributeStart = struct {
-        name: []const u8,
-    };
-
-    pub const AttributeContent = struct {
-        content: Content,
-        final: bool = false,
-    };
-
-    pub const CommentContent = struct {
-        content: []const u8,
-        final: bool = false,
-    };
-
-    pub const PiStart = struct {
-        target: []const u8,
-    };
-
-    pub const PiContent = struct {
-        content: []const u8,
-        final: bool = false,
-    };
-
-    /// A bit of content of an element or attribute.
-    pub const Content = union(enum) {
-        /// Raw text content (does not contain any entities).
-        text: []const u8,
-        /// A Unicode codepoint.
-        codepoint: u21,
-        /// An entity reference, such as `&amp;`. The range covers the name (`amp`).
-        entity: []const u8,
-    };
-};
-
-/// A location in a file.
-pub const Location = struct {
-    /// The line number, starting at 1.
-    line: usize = 1,
-    /// The column number, starting at 1. Columns are counted using Unicode
-    /// codepoints.
-    column: usize = 1,
-    /// Whether the last character seen was a `\r`.
-    after_cr: bool = false,
-
-    /// Advances the location by a single codepoint.
-    pub fn advance(self: *Location, c: u21) void {
-        if (c == '\n') {
-            self.line += 1;
-            self.column = 1;
-            self.after_cr = false;
-        } else if (c == '\r') {
-            if (self.after_cr) {
-                self.line += 1;
-                self.column = 1;
-            }
-            self.column += 1;
-            self.after_cr = true;
-        } else if (self.after_cr) {
-            self.line += 1;
-            // Plain CR line endings cannot be detected as new lines
-            // immediately, since they could be followed by LF. The following
-            // character is what completes the line ending interpretation.
-            self.column = 2;
-            self.after_cr = false;
-        } else {
-            self.column += 1;
-        }
-    }
-};
-
-test Location {
-    var loc = Location{};
-    try expectLocation(loc, 1, 1);
-    loc.advance('A');
-    try expectLocation(loc, 1, 2);
-    loc.advance('よ');
-    try expectLocation(loc, 1, 3);
-    loc.advance('🥰');
-    try expectLocation(loc, 1, 4);
-    loc.advance('\n');
-    try expectLocation(loc, 2, 1);
-    loc.advance('\r');
-    loc.advance('\n');
-    try expectLocation(loc, 3, 1);
-    loc.advance('\r');
-    loc.advance('A');
-    try expectLocation(loc, 4, 2);
-    loc.advance('\r');
-    loc.advance('\r');
-    loc.advance('A');
-    try expectLocation(loc, 6, 2);
-}
-
-fn expectLocation(loc: Location, line: usize, column: usize) !void {
-    if (loc.line != line or loc.column != column) {
-        std.debug.print("expected {}:{}, found {}:{}", .{ line, column, loc.line, loc.column });
-        return error.TestExpectedEqual;
-    }
-}
-
-/// A drop-in replacement for `Location` which does not actually store location
-/// information.
-pub const NoOpLocation = struct {
-    pub inline fn advance(_: *NoOpLocation, _: u21) void {}
-};
-
-/// Wraps a `std.io.Reader` in a `TokenReader` with the default buffer size
-/// (4096).
-pub fn tokenReader(
-    reader: anytype,
-    comptime options: TokenReaderOptions,
-) TokenReader(@TypeOf(reader), options) {
-    return TokenReader(@TypeOf(reader), options).init(reader, .{});
-}
-
-/// Options for a `TokenReader`.
-pub const TokenReaderOptions = struct {
-    /// The type of decoder to use.
-    DecoderType: type = encoding.DefaultDecoder,
-    /// The size of the internal buffer.
-    ///
-    /// This limits the byte length of "non-splittable" content, such as
-    /// element and attribute names. Longer such content will result in
-    /// `error.Overflow`.
-    buffer_size: usize = 4096,
-    /// Whether to normalize line endings and attribute values according to the
-    /// XML specification.
-    ///
-    /// If this is set to false, no normalization will be done: for example,
-    /// the line ending sequence `\r\n` will appear as-is in returned tokens
-    /// rather than the normalized `\n`.
-    enable_normalization: bool = true,
-    /// Whether to keep track of the current location in the document.
-    track_location: bool = false,
-};
-
-/// An XML parser which wraps a `std.io.Reader` and returns low-level tokens.
-///
-/// An internal buffer of size `buffer_size` is used to store data read from
-/// the reader, which is referenced by the returned tokens.
-///
-/// This parser offers several advantages over `Scanner` for higher-level
-/// use-cases:
-///
-/// - The returned `Token`s use byte slices rather than positional ranges.
-/// - The `next` function can be used in the typical Zig iterator pattern.
-///   There is no `ok` token which must be ignored, and there is no need to
-///   directly signal the end of input (the `Reader` provides this indication).
-/// - The line ending and attribute value normalization steps required by the
-///   XML specification (minus further attribute value normalization which
-///   depends on DTD information) are performed.
-///
-/// However, due to its use of an internal buffer and transcoding all input to
-/// UTF-8, it is not as efficient as a `Scanner` where these considerations are
-/// important. Additionally, `buffer_size` limits the maximum byte length of
-/// "unsplittable" content, such as element and attribute names (but not
-/// "splittable" content, such as element text content and attribute values).
-pub fn TokenReader(comptime ReaderType: type, comptime options: TokenReaderOptions) type {
-    return struct {
-        scanner: Scanner,
-        reader: ReaderType,
-        decoder: options.DecoderType,
-        /// The data for the most recently returned token.
-        token_data: Token.Data = undefined,
-        /// The current location in the file (if enabled).
-        location: if (options.track_location) Location else NoOpLocation = .{},
-        /// Buffered content read by the reader for the current token.
-        ///
-        /// Events may reference this buffer via slices. The contents of the
-        /// buffer (up until `scanner.pos`) are always valid UTF-8.
-        buffer: [options.buffer_size]u8 = undefined,
-        /// Whether the last codepoint read was a carriage return (`\r`).
-        ///
-        /// This is relevant for line break normalization.
-        after_cr: if (options.enable_normalization) bool else void = if (options.enable_normalization) false,
-
-        const Self = @This();
-
-        pub const Error = error{
-            InvalidEncoding,
-            InvalidPiTarget,
-            Overflow,
-            UnexpectedEndOfInput,
-        } || ReaderType.Error || options.DecoderType.Error || Scanner.Error;
-
-        const max_encoded_codepoint_len = @max(options.DecoderType.max_encoded_codepoint_len, 4);
-
-        pub fn init(reader: ReaderType, decoder: options.DecoderType) Self {
-            return .{
-                .scanner = Scanner{},
-                .reader = reader,
-                .decoder = decoder,
-            };
-        }
-
-        /// Returns the full token (including data) from the most recent call to
-        /// `next`. `token` must be the token returned from the last call to
-        /// `next`.
-        pub fn fullToken(self: *const Self, token: Token) Token.Full {
-            return switch (token) {
-                inline else => |tag| @unionInit(Token.Full, @tagName(tag), @field(self.token_data, @tagName(tag))),
-            };
-        }
-
-        /// Returns the next token from the input.
-        ///
-        /// The slices in the `token_data` stored during this call are only
-        /// valid until the next call to `next`.
-        pub fn next(self: *Self) Error!Token {
-            if (self.scanner.pos > 0) {
-                // If the scanner position is > 0, that means we emitted an event
-                // on the last call to next, and should try to reset the
-                // position again in an effort to not run out of buffer space
-                // (ideally, the scanner should be resettable after every token,
-                // but we do not depend on this).
-                if (self.scanner.resetPos()) |token| {
-                    if (token != .ok) {
-                        return try self.bufToken(token);
-                    }
-                } else |_| {
-                    // Failure to reset isn't fatal (yet); we can still try to
-                    // complete the token below
-                }
-            }
-
-            while (true) {
-                if (self.scanner.pos + max_encoded_codepoint_len >= self.buffer.len) {
-                    if (self.scanner.resetPos()) |token| {
-                        if (token != .ok) {
-                            return try self.bufToken(token);
-                        }
-                    } else |_| {
-                        // Failure to reset here still isn't fatal, since we
-                        // may end up getting shorter codepoints which manage
-                        // to complete the current token.
-                    }
-                }
-
-                const c = try self.nextCodepoint();
-                if (!c.present) {
-                    try self.scanner.endInput();
-                    self.token_data = .{ .eof = {} };
-                    return .eof;
-                }
-                const token = try self.scanner.next(c.codepoint, c.byte_length);
-                if (token != .ok) {
-                    return try self.bufToken(token);
-                }
-            }
-        }
-
-        const nextCodepoint = if (options.enable_normalization) nextCodepointNormalized else nextCodepointRaw;
-
-        fn nextCodepointNormalized(self: *Self) !encoding.ReadResult {
-            var c = try self.nextCodepointRaw();
-            if (!c.present) return c;
-            if (self.after_cr) {
-                self.after_cr = false;
-                if (c.codepoint == '\n') {
-                    // \n after \r is ignored because \r was already processed
-                    // as a line ending
-                    c = try self.nextCodepointRaw();
-                    if (!c.present) return c;
-                }
-            }
-            if (c.codepoint == '\r') {
-                self.after_cr = true;
-                c.codepoint = '\n';
-                self.buffer[self.scanner.pos] = '\n';
-            }
-            if (self.scanner.state == .attribute_content and
-                (c.codepoint == '\t' or c.codepoint == '\r' or c.codepoint == '\n'))
-            {
-                c.codepoint = ' ';
-                self.buffer[self.scanner.pos] = ' ';
-            }
-            return c;
-        }
-
-        fn nextCodepointRaw(self: *Self) !encoding.ReadResult {
-            const c = try self.decoder.readCodepoint(self.reader, self.buffer[self.scanner.pos..]);
-            if (c.present) self.location.advance(c.codepoint);
-            return c;
-        }
-
-        fn bufToken(self: *Self, token: Scanner.Token) !Token {
-            switch (token) {
-                .ok => unreachable,
-                .xml_declaration => {
-                    self.token_data = .{ .xml_declaration = .{
-                        .version = self.bufRange(self.scanner.token_data.xml_declaration.version),
-                        .encoding = if (self.scanner.token_data.xml_declaration.encoding) |enc| self.bufRange(enc) else null,
-                        .standalone = self.scanner.token_data.xml_declaration.standalone,
-                    } };
-                    if (self.token_data.xml_declaration.encoding) |declared_encoding| {
-                        try self.decoder.adaptTo(declared_encoding);
-                    }
-                    return .xml_declaration;
-                },
-                .element_start => {
-                    self.token_data = .{ .element_start = .{
-                        .name = self.bufRange(self.scanner.token_data.element_start.name),
-                    } };
-                    return .element_start;
-                },
-                .element_content => {
-                    self.token_data = .{ .element_content = .{
-                        .content = self.bufContent(self.scanner.token_data.element_content.content),
-                    } };
-                    return .element_content;
-                },
-                .element_end => {
-                    self.token_data = .{ .element_end = .{
-                        .name = self.bufRange(self.scanner.token_data.element_end.name),
-                    } };
-                    return .element_end;
-                },
-                .element_end_empty => {
-                    self.token_data = .{ .element_end_empty = {} };
-                    return .element_end_empty;
-                },
-                .attribute_start => {
-                    self.token_data = .{ .attribute_start = .{
-                        .name = self.bufRange(self.scanner.token_data.attribute_start.name),
-                    } };
-                    return .attribute_start;
-                },
-                .attribute_content => {
-                    self.token_data = .{ .attribute_content = .{
-                        .content = self.bufContent(self.scanner.token_data.attribute_content.content),
-                        .final = self.scanner.token_data.attribute_content.final,
-                    } };
-                    return .attribute_content;
-                },
-                .comment_start => {
-                    self.token_data = .{ .comment_start = {} };
-                    return .comment_start;
-                },
-                .comment_content => {
-                    self.token_data = .{ .comment_content = .{
-                        .content = self.bufRange(self.scanner.token_data.comment_content.content),
-                        .final = self.scanner.token_data.comment_content.final,
-                    } };
-                    return .comment_content;
-                },
-                .pi_start => {
-                    const target = self.bufRange(self.scanner.token_data.pi_start.target);
-                    if (std.ascii.eqlIgnoreCase(target, "xml")) {
-                        return error.InvalidPiTarget;
-                    }
-                    self.token_data = .{ .pi_start = .{
-                        .target = target,
-                    } };
-                    return .pi_start;
-                },
-                .pi_content => {
-                    self.token_data = .{ .pi_content = .{
-                        .content = self.bufRange(self.scanner.token_data.pi_content.content),
-                        .final = self.scanner.token_data.pi_content.final,
-                    } };
-                    return .pi_content;
-                },
-            }
-        }
-
-        inline fn bufContent(self: *const Self, content: Scanner.Token.Content) Token.Content {
-            return switch (content) {
-                .text => |text| .{ .text = self.bufRange(text) },
-                .codepoint => |codepoint| .{ .codepoint = codepoint },
-                .entity => |entity| .{ .entity = self.bufRange(entity) },
-            };
-        }
-
-        inline fn bufRange(self: *const Self, range: Scanner.Range) []const u8 {
-            return self.buffer[range.start..range.end];
-        }
-    };
-}
-
-test TokenReader {
-    try testValid(.{},
-        \\<?xml version="1.0"?>
-        \\<?some-pi?>
-        \\<!-- A processing instruction with content follows -->
-        \\<?some-pi-with-content content?>
-        \\<root>
-        \\  <p class="test">Hello, <![CDATA[world!]]></p>
-        \\  <line />
-        \\  <?another-pi?>
-        \\  Text content goes here.
-        \\  <div><p>&amp;</p></div>
-        \\</root>
-        \\<!-- Comments are allowed after the end of the root element -->
-        \\
-        \\<?comment So are PIs ?>
-        \\
-        \\
-    , &.{
-        .{ .xml_declaration = .{ .version = "1.0" } },
-        .{ .pi_start = .{ .target = "some-pi" } },
-        .{ .pi_content = .{ .content = "", .final = true } },
-        .comment_start,
-        .{ .comment_content = .{ .content = " A processing instruction with content follows ", .final = true } },
-        .{ .pi_start = .{ .target = "some-pi-with-content" } },
-        .{ .pi_content = .{ .content = "content", .final = true } },
-        .{ .element_start = .{ .name = "root" } },
-        .{ .element_content = .{ .content = .{ .text = "\n  " } } },
-        .{ .element_start = .{ .name = "p" } },
-        .{ .attribute_start = .{ .name = "class" } },
-        .{ .attribute_content = .{ .content = .{ .text = "test" }, .final = true } },
-        .{ .element_content = .{ .content = .{ .text = "Hello, " } } },
-        .{ .element_content = .{ .content = .{ .text = "world!" } } },
-        .{ .element_end = .{ .name = "p" } },
-        .{ .element_content = .{ .content = .{ .text = "\n  " } } },
-        .{ .element_start = .{ .name = "line" } },
-        .element_end_empty,
-        .{ .element_content = .{ .content = .{ .text = "\n  " } } },
-        .{ .pi_start = .{ .target = "another-pi" } },
-        .{ .pi_content = .{ .content = "", .final = true } },
-        .{ .element_content = .{ .content = .{ .text = "\n  Text content goes here.\n  " } } },
-        .{ .element_start = .{ .name = "div" } },
-        .{ .element_start = .{ .name = "p" } },
-        .{ .element_content = .{ .content = .{ .entity = "amp" } } },
-        .{ .element_end = .{ .name = "p" } },
-        .{ .element_end = .{ .name = "div" } },
-        .{ .element_content = .{ .content = .{ .text = "\n" } } },
-        .{ .element_end = .{ .name = "root" } },
-        .comment_start,
-        .{ .comment_content = .{ .content = " Comments are allowed after the end of the root element ", .final = true } },
-        .{ .pi_start = .{ .target = "comment" } },
-        .{ .pi_content = .{ .content = "So are PIs ", .final = true } },
-    });
-}
-
-test "normalization" {
-    try testValid(.{}, "<root>Line 1\rLine 2\r\nLine 3\nLine 4\n\rLine 6\r\n\rLine 8</root>", &.{
-        .{ .element_start = .{ .name = "root" } },
-        .{ .element_content = .{ .content = .{ .text = "Line 1\nLine 2\nLine 3\nLine 4\n\nLine 6\n\nLine 8" } } },
-        .{ .element_end = .{ .name = "root" } },
-    });
-    try testValid(.{}, "<root attr=' Line 1\rLine 2\r\nLine 3\nLine 4\t\tMore    content\n\rLine 6\r\n\rLine 8 '/>", &.{
-        .{ .element_start = .{ .name = "root" } },
-        .{ .attribute_start = .{ .name = "attr" } },
-        .{ .attribute_content = .{
-            .content = .{ .text = " Line 1 Line 2 Line 3 Line 4  More    content  Line 6  Line 8 " },
-            .final = true,
-        } },
-        .element_end_empty,
-    });
-    try testValid(.{ .enable_normalization = false }, "<root>Line 1\rLine 2\r\nLine 3\nLine 4\n\rLine 6\r\n\rLine 8</root>", &.{
-        .{ .element_start = .{ .name = "root" } },
-        .{ .element_content = .{ .content = .{ .text = "Line 1\rLine 2\r\nLine 3\nLine 4\n\rLine 6\r\n\rLine 8" } } },
-        .{ .element_end = .{ .name = "root" } },
-    });
-    try testValid(.{ .enable_normalization = false }, "<root attr=' Line 1\rLine 2\r\nLine 3\nLine 4\t\tMore    content\n\rLine 6\r\n\rLine 8 '/>", &.{
-        .{ .element_start = .{ .name = "root" } },
-        .{ .attribute_start = .{ .name = "attr" } },
-        .{ .attribute_content = .{
-            .content = .{ .text = " Line 1\rLine 2\r\nLine 3\nLine 4\t\tMore    content\n\rLine 6\r\n\rLine 8 " },
-            .final = true,
-        } },
-        .element_end_empty,
-    });
-}
-
-test "PI target" {
-    try testValid(.{}, "<?xml version='1.0'?><root><?some-pi?></root>", &.{
-        .{ .xml_declaration = .{ .version = "1.0" } },
-        .{ .element_start = .{ .name = "root" } },
-        .{ .pi_start = .{ .target = "some-pi" } },
-        .{ .pi_content = .{ .content = "", .final = true } },
-        .{ .element_end = .{ .name = "root" } },
-    });
-    try testValid(.{}, "<root><?x 2?></root>", &.{
-        .{ .element_start = .{ .name = "root" } },
-        .{ .pi_start = .{ .target = "x" } },
-        .{ .pi_content = .{ .content = "2", .final = true } },
-        .{ .element_end = .{ .name = "root" } },
-    });
-    try testValid(.{}, "<root><?xm 2?></root>", &.{
-        .{ .element_start = .{ .name = "root" } },
-        .{ .pi_start = .{ .target = "xm" } },
-        .{ .pi_content = .{ .content = "2", .final = true } },
-        .{ .element_end = .{ .name = "root" } },
-    });
-    try testValid(.{}, "<root><?xml2 2?></root>", &.{
-        .{ .element_start = .{ .name = "root" } },
-        .{ .pi_start = .{ .target = "xml2" } },
-        .{ .pi_content = .{ .content = "2", .final = true } },
-        .{ .element_end = .{ .name = "root" } },
-    });
-    try testInvalid(.{}, "<root><?xml?></root>", error.InvalidPiTarget);
-    try testInvalid(.{}, "<root><?XML?></root>", error.InvalidPiTarget);
-    try testInvalid(.{}, "<root><?Xml stuff?></root>", error.InvalidPiTarget);
-    try testInvalid(.{}, "<root><?xml version='1.0'?></root>", error.InvalidPiTarget);
-}
-
-fn testValid(comptime options: TokenReaderOptions, input: []const u8, expected_tokens: []const Token.Full) !void {
-    var input_stream = std.io.fixedBufferStream(input);
-    var input_reader = tokenReader(input_stream.reader(), options);
-    var i: usize = 0;
-    while (true) : (i += 1) {
-        const token = try input_reader.next();
-        if (token == .eof) break;
-        if (i >= expected_tokens.len) {
-            std.debug.print("Unexpected token after end: {}\n", .{token});
-            return error.TestFailed;
-        }
-        testing.expectEqualDeep(expected_tokens[i], input_reader.fullToken(token)) catch |e| {
-            std.debug.print("(at index {})\n", .{i});
-            return e;
-        };
-    }
-    if (i != expected_tokens.len) {
-        std.debug.print("Expected {} tokens, found {}\n", .{ expected_tokens.len, i });
-        return error.TestFailed;
-    }
-}
-
-fn testInvalid(comptime options: TokenReaderOptions, input: []const u8, expected_error: anyerror) !void {
-    var input_stream = std.io.fixedBufferStream(input);
-    var input_reader = tokenReader(input_stream.reader(), options);
-    while (input_reader.next()) |token| {
-        if (token == .eof) return error.TestExpectedError;
-    } else |err| {
-        try testing.expectEqual(expected_error, err);
-    }
-}
diff --git a/src/writer.zig b/src/writer.zig
deleted file mode 100644
index 8871139..0000000
--- a/src/writer.zig
+++ /dev/null
@@ -1,264 +0,0 @@
-const std = @import("std");
-const fmt = std.fmt;
-const testing = std.testing;
-const ArrayListUnmanaged = std.ArrayListUnmanaged;
-const Event = @import("reader.zig").Event;
-const QName = @import("reader.zig").QName;
-
-/// Returns a `Writer` wrapping a `std.io.Writer`.
-pub fn writer(w: anytype) Writer(@TypeOf(w)) {
-    return .{ .w = w };
-}
-
-/// A streaming XML writer wrapping a `std.io.Writer`.
-///
-/// This writer exposes a selection of functions to write XML content with
-/// proper escaping where possible.
-///
-/// Some write functions come in sets to allow streaming longer contents rather
-/// than writing them all in one go: for example, `writeAttribute` is useful for
-/// writing an entire attribute name-value pair in one shot, but if the attribute
-/// value is potentially quite long, the sequence of `writeAttributeStart`,
-/// followed by an arbitrary (even zero) number of `writeAttributeContent`,
-/// followed by `writeAttributeEnd`, can be used as a lower-level alternative.
-///
-/// One interesting lower-level function is `writeElementStartEnd`, which is used
-/// to tell the writer to finish the current element start tag (all attributes
-/// have been written), in preparation for writing other content. The other
-/// functions (such as `writeElementContent`) will call this themselves if the
-/// writer is in the middle of a start tag, but calling this function directly
-/// could be useful if the user plans to write directly to the underlying
-/// writer.
-///
-/// Additionally, this writer makes no attempt at being able to write XML in
-/// arbitrary styles. For example, the quote character is not configurable, and
-/// there is no function for writing CDATA sections.
-///
-/// # Safety
-///
-/// There are caveats to the well-formedness of the resulting output:
-///
-/// 1. There is no protection against calling the various write functions out of
-///    order. For example, calling `writeElementEnd` without a corresponding
-///    `writeElementStart` will result in non-well-formed XML.
-/// 2. Processing instructions (PIs) and comments do not support escaping their
-///    content, so passing content to the corresponding write functions which
-///    contains illegal sequences for those constructs will result in
-///    unexpected outcomes. For example, calling `writeComment` with a value
-///    containing `-->` will result in the writer happily writing out the raw
-///    `-->` in the text of the comment, which will close the comment and write
-///    the rest of the provided text as raw XML (followed by the writer's
-///    inserted `-->`).
-/// 3. There are no validations that the names of elements and attributes match
-///    the allowed syntax for names. Likewise, there are no validations that the
-///    `version` and `encoding` passed to `writeXmlDeclaration` match the
-///    allowed syntax for those values.
-///
-/// As such, it is not safe to use all functionality of this writer with
-/// arbitrary user-provided data. What _is_ safe, however, is the more common
-/// case of using this writer with only attribute values and element content
-/// containing user-provided data, since those can always be escaped properly.
-pub fn Writer(comptime WriterType: type) type {
-    return struct {
-        w: WriterType,
-        in_element_start: bool = false,
-
-        const Self = @This();
-
-        pub const Error = WriterType.Error;
-
-        pub fn writeXmlDeclaration(self: *Self, version: []const u8, encoding: ?[]const u8, standalone: ?bool) Error!void {
-            try self.w.print("<?xml version=\"{}\"", .{fmtAttributeContent(version)});
-            if (encoding) |e| {
-                try self.w.print(" encoding=\"{}\"", .{fmtAttributeContent(e)});
-            }
-            if (standalone) |s| {
-                try self.w.print(" standalone=\"{s}\"", .{if (s) "yes" else "no"});
-            }
-            try self.w.writeAll("?>");
-        }
-
-        pub fn writeElementStart(self: *Self, name: QName) Error!void {
-            if (self.in_element_start) {
-                try self.writeElementStartEnd();
-            }
-            try self.w.print("<{}", .{fmtQName(name)});
-            self.in_element_start = true;
-        }
-
-        pub fn writeElementStartEnd(self: *Self) Error!void {
-            try self.w.writeByte('>');
-            self.in_element_start = false;
-        }
-
-        pub fn writeElementContent(self: *Self, content: []const u8) Error!void {
-            if (self.in_element_start) {
-                try self.writeElementStartEnd();
-            }
-            try self.w.print("{}", .{fmtElementContent(content)});
-        }
-
-        pub fn writeElementEnd(self: *Self, name: QName) Error!void {
-            if (self.in_element_start) {
-                try self.w.writeAll(" />");
-                self.in_element_start = false;
-            } else {
-                try self.w.print("</{}>", .{fmtQName(name)});
-            }
-        }
-
-        pub fn writeAttribute(self: *Self, name: QName, content: []const u8) Error!void {
-            try self.writeAttributeStart(name);
-            try self.writeAttributeContent(content);
-            try self.writeAttributeEnd();
-        }
-
-        pub fn writeAttributeStart(self: *Self, name: QName) Error!void {
-            try self.w.print(" {}=\"", .{fmtQName(name)});
-        }
-
-        pub fn writeAttributeContent(self: *Self, content: []const u8) Error!void {
-            try self.w.print("{}", .{fmtAttributeContent(content)});
-        }
-
-        pub fn writeAttributeEnd(self: *Self) Error!void {
-            try self.w.writeByte('"');
-        }
-
-        pub fn writeComment(self: *Self, content: []const u8) Error!void {
-            try self.writeCommentStart();
-            try self.writeCommentContent(content);
-            try self.writeCommentEnd();
-        }
-
-        pub fn writeCommentStart(self: *Self) Error!void {
-            if (self.in_element_start) {
-                try self.writeElementStartEnd();
-            }
-            try self.w.writeAll("<!--");
-        }
-
-        pub fn writeCommentContent(self: *Self, content: []const u8) Error!void {
-            try self.w.writeAll(content);
-        }
-
-        pub fn writeCommentEnd(self: *Self) Error!void {
-            try self.w.writeAll("-->");
-        }
-
-        pub fn writePi(self: *Self, target: []const u8, content: []const u8) Error!void {
-            try self.writePiStart(target);
-            try self.writePiContent(content);
-            try self.writePiEnd();
-        }
-
-        pub fn writePiStart(self: *Self, target: []const u8) Error!void {
-            if (self.in_element_start) {
-                try self.writeElementStartEnd();
-            }
-            try self.w.print("<?{} ", .{target});
-        }
-
-        pub fn writePiContent(self: *Self, content: []const u8) Error!void {
-            try self.w.writeAll(content);
-        }
-
-        pub fn writePiEnd(self: *Self) Error!void {
-            try self.w.writeAll("?>");
-        }
-    };
-}
-
-test Writer {
-    var output = ArrayListUnmanaged(u8){};
-    defer output.deinit(testing.allocator);
-    var xml_writer = writer(output.writer(testing.allocator));
-
-    const xmlns_ns = "http://www.w3.org/2000/xmlns/";
-    try xml_writer.writeXmlDeclaration("1.0", "UTF-8", true);
-    // The ns part of the QName is not used when writing, but may factor in to
-    // future (optional) safety checks
-    try xml_writer.writeElementStart(.{ .prefix = "test", .ns = "http://example.com/ns/test", .local = "root" });
-    try xml_writer.writeAttribute(.{ .prefix = "xmlns", .ns = xmlns_ns, .local = "test" }, "http://example.com/ns/test");
-    try xml_writer.writeComment(" Hello, world! ");
-    try xml_writer.writeElementContent("Some text & some other text. ");
-    try xml_writer.writeElementContent("Another <sentence>.");
-    try xml_writer.writeElementStart(.{ .local = "sub" });
-    try xml_writer.writeAttribute(.{ .local = "escaped" }, "&<>\"'");
-    try xml_writer.writeElementEnd(.{ .local = "sub" });
-    try xml_writer.writeElementEnd(.{ .prefix = "test", .ns = "http://example.com/ns/test", .local = "root" });
-
-    try testing.expectEqualStrings(
-        \\<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-        ++
-        \\<test:root xmlns:test="http://example.com/ns/test">
-        ++
-        \\<!-- Hello, world! -->
-        ++
-        \\Some text &amp; some other text. Another &lt;sentence>.
-        ++
-        \\<sub escaped="&amp;&lt;>&quot;'" />
-        ++
-        \\</test:root>
-    , output.items);
-}
-
-/// Returns a `std.fmt.Formatter` for escaped attribute content.
-pub fn fmtAttributeContent(data: []const u8) fmt.Formatter(formatAttributeContent) {
-    return .{ .data = data };
-}
-
-fn formatAttributeContent(
-    data: []const u8,
-    comptime _: []const u8,
-    _: fmt.FormatOptions,
-    w: anytype,
-) !void {
-    for (data) |b| switch (b) {
-        '\t' => try w.writeAll("&#9;"),
-        '\n' => try w.writeAll("&#10;"),
-        '\r' => try w.writeAll("&#13;"),
-        '"' => try w.writeAll("&quot;"),
-        '&' => try w.writeAll("&amp;"),
-        '<' => try w.writeAll("&lt;"),
-        else => try w.writeByte(b),
-    };
-}
-
-/// Returns a `std.fmt.Formatter` for escaped element content.
-pub fn fmtElementContent(data: []const u8) fmt.Formatter(formatElementContent) {
-    return .{ .data = data };
-}
-
-fn formatElementContent(
-    data: []const u8,
-    comptime _: []const u8,
-    _: fmt.FormatOptions,
-    w: anytype,
-) !void {
-    for (data) |b| switch (b) {
-        '\r' => try w.writeAll("&#13;"),
-        '&' => try w.writeAll("&amp;"),
-        '<' => try w.writeAll("&lt;"),
-        else => try w.writeByte(b),
-    };
-}
-
-/// Returns a `std.fmt.Formatter` for a QName (formats as `prefix:local` or
-/// just `local` if no prefix).
-pub fn fmtQName(data: QName) fmt.Formatter(formatQName) {
-    return .{ .data = data };
-}
-
-fn formatQName(
-    data: QName,
-    comptime _: []const u8,
-    _: fmt.FormatOptions,
-    w: anytype,
-) !void {
-    if (data.prefix) |prefix| {
-        try w.writeAll(prefix);
-        try w.writeByte(':');
-    }
-    try w.writeAll(data.local);
-}
diff --git a/src/xml.zig b/src/xml.zig
index 5856a7b..bfe4569 100644
--- a/src/xml.zig
+++ b/src/xml.zig
@@ -1,53 +1,476 @@
-//! An XML library, currently supporting reading XML.
-//!
-//! Most applications will want to start with `Reader` and investigate the
-//! other parser options if they want to avoid dynamic memory allocation or
-//! want better performance at the expense of ease of use.
-//!
-//! There are three parsers available, with increasing levels of abstraction,
-//! ease of use, and standard conformance. The documentation for each parser
-//! provides more detailed information on its functionality.
-//!
-//! 1. `Scanner` - the lowest-level parser. A state machine that accepts
-//!    Unicode codepoints one by one and returns "tokens" referencing ranges of
-//!    input data.
-//! 2. `TokenReader` - a mid-level parser that improves on `Scanner` by
-//!    buffering input so that returned tokens can use UTF-8-encoded byte
-//!    slices rather than ranges. It also uses a `std.io.Reader` and a decoder
-//!    (see `encoding`) rather than forcing the user to pass codepoints
-//!    directly.
-//! 3. `Reader` - a general-purpose streaming parser which can handle
-//!    namespaces. Helper functions are available to parse some or all of a
-//!    document into a `Node`, which acts as a minimal DOM abstraction.
-
 const std = @import("std");
-const testing = std.testing;
+const Allocator = std.mem.Allocator;
+const assert = std.debug.assert;
+const expectEqual = std.testing.expectEqual;
+const expectEqualStrings = std.testing.expectEqualStrings;
+
+pub const Location = struct {
+    line: usize,
+    column: usize,
+
+    pub const start: Location = .{ .line = 1, .column = 1 };
+
+    pub fn update(loc: *Location, s: []const u8) void {
+        var pos: usize = 0;
+        while (std.mem.indexOfAnyPos(u8, s, pos, "\r\n")) |nl_pos| {
+            loc.line += 1;
+            loc.column = 1;
+            if (s[nl_pos] == '\r' and nl_pos + 1 < s.len and s[nl_pos + 1] == '\n') {
+                pos = nl_pos + 2;
+            } else {
+                pos = nl_pos + 1;
+            }
+        }
+        loc.column += s.len - pos;
+    }
+};
+
+pub const QName = struct {
+    ns: []const u8,
+    local: []const u8,
+};
+
+pub const PrefixedQName = struct {
+    prefix: []const u8,
+    ns: []const u8,
+    local: []const u8,
+};
+
+pub const predefined_entities = std.StaticStringMap([]const u8).initComptime(.{
+    .{ "lt", "<" },
+    .{ "gt", ">" },
+    .{ "amp", "&" },
+    .{ "apos", "'" },
+    .{ "quot", "\"" },
+});
+
+pub const ns_xml = "http://www.w3.org/XML/1998/namespace";
+pub const ns_xmlns = "http://www.w3.org/2000/xmlns/";
+pub const predefined_namespace_uris = std.StaticStringMap([]const u8).initComptime(.{
+    .{ "xml", ns_xml },
+    .{ "xmlns", ns_xmlns },
+});
+
+pub const Reader = @import("Reader.zig");
+
+pub fn GenericReader(comptime SourceError: type) type {
+    return struct {
+        reader: Reader,
+
+        /// See `Reader.deinit`.
+        pub inline fn deinit(reader: *@This()) void {
+            reader.reader.deinit();
+        }
+
+        pub const ReadError = Reader.ReadError || SourceError;
+
+        /// See `Reader.read`.
+        pub inline fn read(reader: *@This()) ReadError!Reader.Node {
+            return @errorCast(reader.reader.read());
+        }
+
+        /// See `Reader.readElementText`.
+        pub inline fn readElementText(reader: *@This()) (ReadError || Allocator.Error)![]const u8 {
+            return @errorCast(reader.reader.readElementText());
+        }
+
+        pub inline fn readElementTextAlloc(reader: *@This(), gpa: Allocator) (ReadError || Allocator.Error)![]u8 {
+            return @errorCast(reader.reader.readElementTextAlloc(gpa));
+        }
+
+        /// See `Reader.readElementTextWrite`.
+        pub inline fn readElementTextWrite(reader: *@This(), writer: anytype) (ReadError || @TypeOf(writer).Error)!void {
+            return @errorCast(reader.reader.readElementTextWrite(writer.any()));
+        }
+
+        /// See `Reader.skipProlog`.
+        pub inline fn skipProlog(reader: *@This()) ReadError!void {
+            return @errorCast(reader.reader.skipProlog());
+        }
+
+        /// See `Reader.skipElement`.
+        pub inline fn skipElement(reader: *@This()) ReadError!void {
+            return @errorCast(reader.reader.skipElement());
+        }
+
+        /// See `Reader.location`.
+        pub inline fn location(reader: @This()) Location {
+            return reader.reader.location();
+        }
+
+        /// See `Reader.errorCode`.
+        pub inline fn errorCode(reader: @This()) Reader.ErrorCode {
+            return reader.reader.errorCode();
+        }
+
+        /// See `Reader.errorLocation`.
+        pub inline fn errorLocation(reader: @This()) Location {
+            return reader.reader.errorLocation();
+        }
+
+        /// See `Reader.xmlDeclarationVersion`.
+        pub inline fn xmlDeclarationVersion(reader: @This()) []const u8 {
+            return reader.reader.xmlDeclarationVersion();
+        }
+
+        /// See `Reader.xmlDeclarationEncoding`.
+        pub inline fn xmlDeclarationEncoding(reader: @This()) ?[]const u8 {
+            return reader.reader.xmlDeclarationEncoding();
+        }
+
+        /// See `Reader.xmlDeclarationStandalone`.
+        pub inline fn xmlDeclarationStandalone(reader: @This()) ?bool {
+            return reader.reader.xmlDeclarationStandalone();
+        }
+
+        /// See `Reader.elementName`.
+        pub inline fn elementName(reader: @This()) []const u8 {
+            return reader.reader.elementName();
+        }
+
+        /// See `Reader.elementNameNs`.
+        pub inline fn elementNameNs(reader: @This()) PrefixedQName {
+            return reader.reader.elementNameNs();
+        }
+
+        /// See `Reader.attributeCount`.
+        pub inline fn attributeCount(reader: @This()) usize {
+            return reader.reader.attributeCount();
+        }
+
+        /// See `Reader.attributeName`.
+        pub inline fn attributeName(reader: @This(), n: usize) []const u8 {
+            return reader.reader.attributeName(n);
+        }
+
+        /// See `Reader.attributeNameNs`.
+        pub inline fn attributeNameNs(reader: @This(), n: usize) PrefixedQName {
+            return reader.reader.attributeNameNs(n);
+        }
+
+        /// See `Reader.attributeValue`.
+        pub inline fn attributeValue(reader: *@This(), n: usize) Allocator.Error![]const u8 {
+            return reader.reader.attributeValue(n);
+        }
+
+        /// See `Reader.attributeValueAlloc`.
+        pub inline fn attributeValueAlloc(reader: @This(), gpa: Allocator, n: usize) Allocator.Error![]u8 {
+            return reader.reader.attributeValueAlloc(gpa, n);
+        }
+
+        /// See `Reader.attributeValueWrite`.
+        pub inline fn attributeValueWrite(reader: @This(), n: usize, writer: anytype) @TypeOf(writer).Error!void {
+            return @errorCast(reader.reader.attributeValueWrite(n, writer.any()));
+        }
+
+        /// See `Reader.attributeValueRaw`.
+        pub inline fn attributeValueRaw(reader: @This(), n: usize) []const u8 {
+            return reader.reader.attributeValueRaw(n);
+        }
+
+        /// See `Reader.attributeLocation`.
+        pub inline fn attributeLocation(reader: @This(), n: usize) Location {
+            return reader.reader.attributeLocation(n);
+        }
+
+        /// See `Reader.attributeIndex`.
+        pub inline fn attributeIndex(reader: @This(), name: []const u8) ?usize {
+            return reader.reader.attributeIndex(name);
+        }
+
+        /// See `Reader.attributeIndexNs`.
+        pub inline fn attributeIndexNs(reader: @This(), ns: []const u8, local: []const u8) ?usize {
+            return reader.reader.attributeIndexNs(ns, local);
+        }
+
+        /// See `Reader.comment`.
+        pub inline fn comment(reader: *@This()) Allocator.Error![]const u8 {
+            return reader.reader.comment();
+        }
+
+        /// See `Reader.commentWrite`.
+        pub inline fn commentWrite(reader: @This(), writer: anytype) @TypeOf(writer).Error!void {
+            return @errorCast(reader.reader.commentWrite(writer.any()));
+        }
+
+        /// See `Reader.commentRaw`.
+        pub inline fn commentRaw(reader: @This()) []const u8 {
+            return reader.reader.commentRaw();
+        }
+
+        /// See `Reader.piTarget`.
+        pub inline fn piTarget(reader: @This()) []const u8 {
+            return reader.reader.piTarget();
+        }
+
+        /// See `Reader.piData`.
+        pub inline fn piData(reader: *@This()) Allocator.Error![]const u8 {
+            return reader.reader.piData();
+        }
+
+        /// See `Reader.piDataWrite`.
+        pub inline fn piDataWrite(reader: @This(), writer: anytype) @TypeOf(writer).Error!void {
+            return @errorCast(reader.reader.piDataWrite(writer.any()));
+        }
+
+        /// See `Reader.piDataRaw`.
+        pub inline fn piDataRaw(reader: @This()) []const u8 {
+            return reader.reader.piDataRaw();
+        }
+
+        /// See `Reader.text`.
+        pub inline fn text(reader: *@This()) Allocator.Error![]const u8 {
+            return reader.reader.text();
+        }
+
+        /// See `Reader.textWrite`.
+        pub inline fn textWrite(reader: @This(), writer: anytype) @TypeOf(writer).Error!void {
+            return @errorCast(reader.reader.textWrite(writer.any()));
+        }
+
+        /// See `Reader.textRaw`.
+        pub inline fn textRaw(reader: @This()) []const u8 {
+            return reader.reader.textRaw();
+        }
+
+        /// See `Reader.cdataWrite`.
+        pub inline fn cdataWrite(reader: @This(), writer: anytype) @TypeOf(writer).Error!void {
+            return @errorCast(reader.reader.cdataWrite(writer.any()));
+        }
+
+        /// See `Reader.cdata`.
+        pub inline fn cdata(reader: *@This()) Allocator.Error![]const u8 {
+            return reader.reader.cdata();
+        }
 
-pub const encoding = @import("encoding.zig");
+        /// See `Reader.cdataRaw`.
+        pub inline fn cdataRaw(reader: @This()) []const u8 {
+            return reader.reader.cdataRaw();
+        }
 
-pub const Scanner = @import("Scanner.zig");
+        /// See `Reader.entityReferenceName`.
+        pub inline fn entityReferenceName(reader: @This()) []const u8 {
+            return reader.reader.entityReferenceName();
+        }
 
-pub const tokenReader = @import("token_reader.zig").tokenReader;
-pub const TokenReader = @import("token_reader.zig").TokenReader;
-pub const TokenReaderOptions = @import("token_reader.zig").TokenReaderOptions;
-pub const Token = @import("token_reader.zig").Token;
+        /// See `Reader.characterReferenceChar`.
+        pub inline fn characterReferenceChar(reader: @This()) u21 {
+            return reader.reader.characterReferenceChar();
+        }
 
-pub const reader = @import("reader.zig").reader;
-pub const readDocument = @import("reader.zig").readDocument;
-pub const Reader = @import("reader.zig").Reader;
-pub const ReaderOptions = @import("reader.zig").ReaderOptions;
-pub const QName = @import("reader.zig").QName;
-pub const Event = @import("reader.zig").Event;
+        /// See `Reader.characterReferenceName`.
+        pub inline fn characterReferenceName(reader: @This()) []const u8 {
+            return reader.reader.characterReferenceName();
+        }
 
-pub const Node = @import("node.zig").Node;
-pub const OwnedValue = @import("node.zig").OwnedValue;
+        /// See `Reader.namespaceUri`.
+        pub inline fn namespaceUri(reader: @This(), prefix: []const u8) []const u8 {
+            return reader.reader.namespaceUri(prefix);
+        }
 
-pub const writer = @import("writer.zig").writer;
-pub const Writer = @import("writer.zig").Writer;
-pub const fmtAttributeContent = @import("writer.zig").fmtAttributeContent;
-pub const fmtElementContent = @import("writer.zig").fmtElementContent;
-pub const fmtQName = @import("writer.zig").fmtQName;
+        /// Returns the underlying raw `Reader`.
+        pub inline fn raw(reader: *@This()) *Reader {
+            return &reader.reader;
+        }
+    };
+}
+
+pub const StaticDocument = struct {
+    data: []const u8,
+    pos: usize,
+
+    pub const Error = error{};
+
+    pub fn init(data: []const u8) StaticDocument {
+        return .{ .data = data, .pos = 0 };
+    }
+
+    pub fn reader(doc: *StaticDocument, gpa: Allocator, options: Reader.Options) GenericReader(Error) {
+        return .{ .reader = Reader.init(gpa, doc.source(), options) };
+    }
+
+    pub fn source(doc: *StaticDocument) Reader.Source {
+        return .{
+            .context = doc,
+            .moveFn = &move,
+        };
+    }
+
+    fn move(context: *const anyopaque, advance: usize, len: usize) anyerror![]const u8 {
+        const doc: *StaticDocument = @alignCast(@constCast(@ptrCast(context)));
+        doc.pos += advance;
+        const rest_doc = doc.data[doc.pos..];
+        return rest_doc[0..@min(len, rest_doc.len)];
+    }
+};
+
+pub fn StreamingDocument(comptime ReaderType: type) type {
+    return struct {
+        stream: ReaderType,
+        buf: []u8,
+        pos: usize,
+        avail: usize,
+        gpa: Allocator,
+
+        pub const Error = ReaderType.Error || Allocator.Error;
+
+        pub fn init(gpa: Allocator, stream: ReaderType) @This() {
+            return .{
+                .stream = stream,
+                .buf = &.{},
+                .pos = 0,
+                .avail = 0,
+                .gpa = gpa,
+            };
+        }
+
+        pub fn deinit(doc: *@This()) void {
+            doc.gpa.free(doc.buf);
+            doc.* = undefined;
+        }
+
+        pub fn reader(doc: *@This(), gpa: Allocator, options: Reader.Options) GenericReader(Error) {
+            return .{ .reader = Reader.init(gpa, doc.source(), options) };
+        }
+
+        pub fn source(doc: *@This()) Reader.Source {
+            return .{
+                .context = doc,
+                .moveFn = &move,
+            };
+        }
+
+        fn move(context: *const anyopaque, advance: usize, len: usize) anyerror![]const u8 {
+            const doc: *@This() = @alignCast(@constCast(@ptrCast(context)));
+            doc.pos += advance;
+            if (len <= doc.avail - doc.pos) return doc.buf[doc.pos..][0..len];
+            doc.discardRead();
+            try doc.fillBuffer(len);
+            return doc.buf[0..@min(len, doc.avail)];
+        }
+
+        fn discardRead(doc: *@This()) void {
+            doc.avail -= doc.pos;
+            std.mem.copyForwards(u8, doc.buf[0..doc.avail], doc.buf[doc.pos..][0..doc.avail]);
+            doc.pos = 0;
+        }
+
+        const min_buf_len = 4096;
+
+        fn fillBuffer(doc: *@This(), target_len: usize) !void {
+            if (target_len > doc.buf.len) {
+                const new_buf_len = @min(min_buf_len, std.math.ceilPowerOfTwoAssert(usize, target_len));
+                doc.buf = try doc.gpa.realloc(doc.buf, new_buf_len);
+            }
+            doc.avail += try doc.stream.read(doc.buf[doc.avail..]);
+        }
+    };
+}
+
+pub fn streamingDocument(gpa: Allocator, reader: anytype) StreamingDocument(@TypeOf(reader)) {
+    return StreamingDocument(@TypeOf(reader)).init(gpa, reader);
+}
+
+test streamingDocument {
+    var fbs = std.io.fixedBufferStream(
+        \\<?xml version="1.0"?>
+        \\<root>Hello, world!</root>
+        \\
+    );
+    var doc = streamingDocument(std.testing.allocator, fbs.reader());
+    defer doc.deinit();
+    var reader = doc.reader(std.testing.allocator, .{});
+    defer reader.deinit();
+
+    try expectEqual(.xml_declaration, try reader.read());
+    try expectEqualStrings("1.0", reader.xmlDeclarationVersion());
+
+    try expectEqual(.element_start, try reader.read());
+    try expectEqualStrings("root", reader.elementName());
+
+    try expectEqual(.text, try reader.read());
+    try expectEqualStrings("Hello, world!", reader.textRaw());
+
+    try expectEqual(.element_end, try reader.read());
+    try expectEqualStrings("root", reader.elementName());
+
+    try expectEqual(.eof, try reader.read());
+}
+
+pub const Writer = @import("Writer.zig");
+
+pub fn GenericWriter(comptime SinkError: type) type {
+    return struct {
+        writer: Writer,
+
+        pub const WriteError = Writer.WriteError || SinkError;
+
+        pub inline fn bom(writer: *@This()) WriteError!void {
+            return @errorCast(writer.writer.bom());
+        }
+
+        pub inline fn xmlDeclaration(writer: *@This(), encoding: ?[]const u8, standalone: ?bool) WriteError!void {
+            return @errorCast(writer.writer.xmlDeclaration(encoding, standalone));
+        }
+
+        pub inline fn elementStart(writer: *@This(), name: []const u8) WriteError!void {
+            return @errorCast(writer.writer.elementStart(name));
+        }
+
+        pub inline fn elementEnd(writer: *@This(), name: []const u8) WriteError!void {
+            return @errorCast(writer.writer.elementEnd(name));
+        }
+
+        pub inline fn elementEndEmpty(writer: *@This()) WriteError!void {
+            return @errorCast(writer.writer.elementEndEmpty());
+        }
+
+        pub inline fn attribute(writer: *@This(), name: []const u8, value: []const u8) WriteError!void {
+            return @errorCast(writer.writer.attribute(name, value));
+        }
+
+        pub inline fn pi(writer: *@This(), target: []const u8, data: []const u8) WriteError!void {
+            return @errorCast(writer.writer.pi(target, data));
+        }
+
+        pub inline fn text(writer: *@This(), s: []const u8) WriteError!void {
+            return @errorCast(writer.writer.text(s));
+        }
+    };
+}
+
+pub fn StreamingOutput(comptime WriterType: type) type {
+    return struct {
+        stream: WriterType,
+
+        pub const Error = WriterType.Error;
+
+        pub fn writer(out: *const @This(), options: Writer.Options) GenericWriter(Error) {
+            return .{ .writer = Writer.init(out.sink(), options) };
+        }
+
+        pub fn sink(out: *const @This()) Writer.Sink {
+            return .{
+                .context = out,
+                .writeFn = &write,
+            };
+        }
+
+        fn write(context: *const anyopaque, data: []const u8) anyerror!void {
+            const out: *const @This() = @alignCast(@ptrCast(context));
+            var pos: usize = 0;
+            while (pos < data.len) {
+                pos += try out.stream.write(data[pos..]);
+            }
+        }
+    };
+}
+
+pub fn streamingOutput(writer: anytype) StreamingOutput(@TypeOf(writer)) {
+    return .{ .stream = writer };
+}
 
 test {
-    testing.refAllDecls(@This());
+    _ = Reader;
+    _ = Writer;
 }
diff --git a/test/xmlconf.zig b/test/xmlconf.zig
deleted file mode 100644
index 4e17e69..0000000
--- a/test/xmlconf.zig
+++ /dev/null
@@ -1,471 +0,0 @@
-//! A test runner for the W3C XML conformance test suite:
-//! https://www.w3.org/XML/Test/
-
-const std = @import("std");
-const xml = @import("xml");
-const fs = std.fs;
-const io = std.io;
-const mem = std.mem;
-const process = std.process;
-const Allocator = mem.Allocator;
-const ArrayListUnmanaged = std.ArrayListUnmanaged;
-
-const usage =
-    \\Usage: xmlconf [options] files...
-    \\
-    \\The provided files are expected to be XML documents containing a root
-    \\TESTCASES element containing TESTs.
-    \\
-    \\Options:
-    \\  -h, --help          show help
-    \\  -v, --verbose       enable verbose output
-    \\
-;
-
-const max_test_data_bytes = 2 * 1024 * 1024; // 2MB
-
-const Suite = struct {
-    profile: ?[]const u8,
-    tests: []const Test,
-};
-
-const Test = struct {
-    id: []const u8,
-    type: Type,
-    version: ?[]const u8,
-    edition: ?[]const u8,
-    entities: Entities,
-    namespace: bool,
-    sections: []const u8,
-    description: []const u8,
-    input: []const u8,
-    output: ?[]const u8,
-
-    const Type = enum {
-        valid,
-        invalid,
-        @"not-wf",
-        @"error",
-
-        fn parse(value: []const u8) !Type {
-            inline for (std.meta.fields(Type)) |field| {
-                if (mem.eql(u8, value, field.name)) {
-                    return @enumFromInt(field.value);
-                }
-            }
-            return error.InvalidTest;
-        }
-    };
-
-    const Entities = enum {
-        both,
-        none,
-        parameter,
-        general,
-
-        fn parse(value: []const u8) !Entities {
-            inline for (std.meta.fields(Entities)) |field| {
-                if (mem.eql(u8, value, field.name)) {
-                    return @enumFromInt(field.value);
-                }
-            }
-            return error.InvalidTest;
-        }
-    };
-};
-
-fn Context(comptime OutType: type) type {
-    return struct {
-        allocator: Allocator,
-        verbose: bool,
-        tty_config: io.tty.Config,
-        out: OutType,
-        passed: ArrayListUnmanaged(Test) = .{},
-        failed: ArrayListUnmanaged(Test) = .{},
-        skipped: ArrayListUnmanaged(Test) = .{},
-
-        const Self = @This();
-
-        fn msg(self: Self, comptime format: []const u8, args: anytype) !void {
-            try self.out.print(format ++ "\n", args);
-        }
-
-        fn pass(self: *Self, @"test": Test) !void {
-            try self.passed.append(self.allocator, @"test");
-            if (self.verbose) {
-                try self.tty_config.setColor(self.out, .green);
-                try self.out.print("PASS: {s} ({s})\n", .{ @"test".id, @"test".sections });
-                try self.tty_config.setColor(self.out, .reset);
-            }
-        }
-
-        fn fail(self: *Self, @"test": Test, reason: []const u8) !void {
-            try self.failed.append(self.allocator, @"test");
-            try self.tty_config.setColor(self.out, .red);
-            try self.out.print("FAIL: {s} ({s}): {s}\n", .{ @"test".id, @"test".sections, reason });
-            try self.tty_config.setColor(self.out, .reset);
-        }
-
-        fn skip(self: *Self, @"test": Test, reason: []const u8) !void {
-            try self.skipped.append(self.allocator, @"test");
-            if (self.verbose) {
-                try self.tty_config.setColor(self.out, .yellow);
-                try self.out.print("SKIP: {s} ({s}): {s}\n", .{ @"test".id, @"test".sections, reason });
-                try self.tty_config.setColor(self.out, .reset);
-            }
-        }
-    };
-}
-
-fn context(allocator: Allocator, verbose: bool, tty_config: io.tty.Config, out: anytype) Context(@TypeOf(out)) {
-    return .{ .allocator = allocator, .verbose = verbose, .tty_config = tty_config, .out = out };
-}
-
-pub fn main() !void {
-    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
-    defer arena.deinit();
-    const allocator = arena.allocator();
-
-    var args_iter = try process.argsWithAllocator(allocator);
-    defer args_iter.deinit();
-    _ = args_iter.skip();
-
-    const stderr = io.getStdErr().writer();
-
-    var allow_options = true;
-    var verbose = false;
-    var suites = ArrayListUnmanaged(Suite){};
-    while (args_iter.next()) |arg| {
-        if (allow_options and mem.startsWith(u8, arg, "-")) {
-            if (std.mem.eql(u8, arg, "-h") or std.mem.eql(u8, arg, "--help")) {
-                try stderr.writeAll(usage);
-                process.exit(0);
-            } else if (std.mem.eql(u8, arg, "-v") or std.mem.eql(u8, arg, "--verbose")) {
-                verbose = true;
-            } else if (std.mem.eql(u8, arg, "--")) {
-                allow_options = false;
-            } else {
-                try stderr.print("unrecognized option: {s}", .{arg});
-                process.exit(1);
-            }
-        } else {
-            var suite_dir = try fs.cwd().openDir(fs.path.dirname(arg) orelse ".", .{});
-            defer suite_dir.close();
-            var suite_file = try suite_dir.openFile(fs.path.basename(arg), .{});
-            defer suite_file.close();
-
-            var buf_reader = io.bufferedReader(suite_file.reader());
-            var suite_reader = xml.reader(allocator, buf_reader.reader(), xml.encoding.DefaultDecoder{}, .{});
-            defer suite_reader.deinit();
-            try suites.append(allocator, try readSuite(allocator, suite_dir, &suite_reader));
-        }
-    }
-
-    if (suites.items.len == 0) {
-        try stderr.writeAll("expected at least one test suite file");
-        process.exit(1);
-    }
-
-    const stdout = io.getStdOut();
-    const tty_config = io.tty.detectConfig(stdout);
-    var stdout_buf = io.bufferedWriter(stdout.writer());
-    var ctx = context(allocator, verbose, tty_config, stdout_buf.writer());
-
-    for (suites.items) |suite| {
-        try runSuite(suite, &ctx);
-    }
-
-    try ctx.msg("DONE: {} passed, {} failed, {} skipped", .{
-        ctx.passed.items.len,
-        ctx.failed.items.len,
-        ctx.skipped.items.len,
-    });
-    try stdout_buf.flush();
-}
-
-fn readSuite(allocator: Allocator, suite_dir: fs.Dir, suite_reader: anytype) !Suite {
-    var profile: ?[]const u8 = null;
-    var tests = ArrayListUnmanaged(Test){};
-
-    while (try suite_reader.next()) |event| {
-        switch (event) {
-            .element_start => |element_start| if (element_start.name.is(null, "TESTCASES")) {
-                for (element_start.attributes) |attr| {
-                    if (attr.name.is(null, "PROFILE")) {
-                        profile = try allocator.dupe(u8, attr.value);
-                    }
-                }
-            } else if (element_start.name.is(null, "TEST")) {
-                try tests.append(allocator, try readTest(allocator, suite_dir, element_start, suite_reader.children()));
-            } else {
-                try suite_reader.children().skip();
-            },
-            else => {},
-        }
-    }
-
-    return .{
-        .profile = profile,
-        .tests = tests.items,
-    };
-}
-
-fn readTest(allocator: Allocator, suite_dir: fs.Dir, test_start: xml.Event.ElementStart, test_reader: anytype) !Test {
-    var id: ?[]const u8 = null;
-    var @"type": ?Test.Type = null;
-    var version: ?[]const u8 = null;
-    var edition: ?[]const u8 = null;
-    var entities = Test.Entities.none;
-    var namespace = true;
-    var sections: ?[]const u8 = null;
-    var description = ArrayListUnmanaged(u8){};
-    var input: ?[]const u8 = null;
-    var output: ?[]const u8 = null;
-
-    for (test_start.attributes) |attr| {
-        if (attr.name.is(null, "ID")) {
-            id = try allocator.dupe(u8, attr.value);
-        } else if (attr.name.is(null, "TYPE")) {
-            @"type" = try Test.Type.parse(attr.value);
-        } else if (attr.name.is(null, "VERSION")) {
-            version = try allocator.dupe(u8, attr.value);
-        } else if (attr.name.is(null, "EDITION")) {
-            edition = try allocator.dupe(u8, attr.value);
-        } else if (attr.name.is(null, "ENTITIES")) {
-            entities = try Test.Entities.parse(attr.value);
-        } else if (attr.name.is(null, "NAMESPACE")) {
-            namespace = mem.eql(u8, attr.value, "yes");
-        } else if (attr.name.is(null, "SECTIONS")) {
-            sections = try allocator.dupe(u8, attr.value);
-        } else if (attr.name.is(null, "URI")) {
-            input = try suite_dir.readFileAlloc(allocator, attr.value, max_test_data_bytes);
-        } else if (attr.name.is(null, "OUTPUT")) {
-            output = try suite_dir.readFileAlloc(allocator, attr.value, max_test_data_bytes);
-        }
-    }
-
-    while (try test_reader.next()) |event| {
-        switch (event) {
-            .element_content => |element_content| try description.appendSlice(allocator, element_content.content),
-            else => {},
-        }
-    }
-
-    return .{
-        .id = id orelse return error.InvalidTest,
-        .type = @"type" orelse return error.InvalidTest,
-        .version = version,
-        .edition = edition,
-        .entities = entities,
-        .namespace = namespace,
-        .sections = sections orelse return error.InvalidTest,
-        .description = description.items,
-        .input = input orelse return error.InvalidTest,
-        .output = output,
-    };
-}
-
-fn runSuite(suite: Suite, ctx: anytype) !void {
-    try ctx.msg("START: {s}", .{suite.profile orelse "untitled"});
-    var suite_ctx = context(ctx.allocator, ctx.verbose, ctx.tty_config, ctx.out);
-    for (suite.tests) |@"test"| {
-        try runTest(@"test", &suite_ctx);
-    }
-    try ctx.msg("DONE: {s}: passed={} failed={} skipped={}", .{
-        suite.profile orelse "untitled",
-        suite_ctx.passed.items.len,
-        suite_ctx.failed.items.len,
-        suite_ctx.skipped.items.len,
-    });
-    try ctx.passed.appendSlice(ctx.allocator, suite_ctx.passed.items);
-    try ctx.failed.appendSlice(ctx.allocator, suite_ctx.failed.items);
-    try ctx.skipped.appendSlice(ctx.allocator, suite_ctx.skipped.items);
-}
-
-fn runTest(@"test": Test, ctx: anytype) !void {
-    if (@"test".version) |version| {
-        if (!mem.eql(u8, version, "1.0")) {
-            return try ctx.skip(@"test", "only XML 1.0 is supported");
-        }
-    }
-    if (@"test".edition) |edition| {
-        // This check will technically be incorrect if a 15th edition is
-        // released at some point, which seems highly unlikely
-        if (mem.indexOfScalar(u8, edition, '5') == null) {
-            return try ctx.skip(@"test", "only the fifth edition of XML 1.0 is supported");
-        }
-    }
-
-    switch (@"test".type) {
-        .valid, .invalid => {
-            var input_stream = io.fixedBufferStream(@"test".input);
-            // TODO: making namespace_aware a comptime option makes this possibly more difficult than it should be
-            if (@"test".namespace) {
-                var input_reader = xml.reader(ctx.allocator, input_stream.reader(), xml.encoding.DefaultDecoder{}, .{});
-                defer input_reader.deinit();
-                try runTestValid(@"test", &input_reader, ctx);
-            } else {
-                var input_reader = xml.reader(ctx.allocator, input_stream.reader(), xml.encoding.DefaultDecoder{}, .{
-                    .namespace_aware = false,
-                });
-                defer input_reader.deinit();
-                try runTestValid(@"test", &input_reader, ctx);
-            }
-        },
-        .@"not-wf" => {
-            var input_stream = io.fixedBufferStream(@"test".input);
-            if (@"test".namespace) {
-                var input_reader = xml.reader(ctx.allocator, input_stream.reader(), xml.encoding.DefaultDecoder{}, .{});
-                defer input_reader.deinit();
-                try runTestNonWf(@"test", &input_reader, ctx);
-            } else {
-                var input_reader = xml.reader(ctx.allocator, input_stream.reader(), xml.encoding.DefaultDecoder{}, .{
-                    .namespace_aware = false,
-                });
-                defer input_reader.deinit();
-                try runTestNonWf(@"test", &input_reader, ctx);
-            }
-        },
-        .@"error" => return try ctx.skip(@"test", "TODO: not sure how to run error tests"),
-    }
-}
-
-fn runTestValid(@"test": Test, input_reader: anytype, ctx: anytype) !void {
-    var buf = ArrayListUnmanaged(u8){};
-    defer buf.deinit(ctx.allocator);
-    while (input_reader.next()) |maybe_event| {
-        if (maybe_event) |event| {
-            try writeCanonical(ctx.allocator, &buf, event);
-        } else {
-            if (@"test".output) |output| {
-                if (!mem.eql(u8, buf.items, output)) {
-                    return try ctx.fail(@"test", "expected output does not match");
-                }
-            }
-            return try ctx.pass(@"test");
-        }
-    } else |e| switch (e) {
-        error.DoctypeNotSupported => return try ctx.skip(@"test", "doctype not supported"),
-        error.CannotUndeclareNsPrefix,
-        error.DuplicateAttribute,
-        error.InvalidCharacterReference,
-        error.InvalidEncoding,
-        error.InvalidNsBinding,
-        error.InvalidPiTarget,
-        error.InvalidQName,
-        error.InvalidUtf8,
-        error.InvalidUtf16,
-        error.MismatchedEndTag,
-        error.SyntaxError,
-        error.UndeclaredEntityReference,
-        error.UndeclaredNsPrefix,
-        error.UnexpectedEndOfInput,
-        error.QNameNotAllowed,
-        => return try ctx.fail(@"test", @errorName(e)),
-        else => |other_e| return other_e,
-    }
-}
-
-fn runTestNonWf(@"test": Test, input_reader: anytype, ctx: anytype) !void {
-    while (input_reader.next()) |event| {
-        if (event == null) {
-            return try ctx.fail(@"test", "expected error, found none");
-        }
-    } else |e| switch (e) {
-        error.DoctypeNotSupported => return try ctx.skip(@"test", "doctype not supported"),
-        error.CannotUndeclareNsPrefix,
-        error.DuplicateAttribute,
-        error.InvalidCharacterReference,
-        error.InvalidEncoding,
-        error.InvalidNsBinding,
-        error.InvalidPiTarget,
-        error.InvalidQName,
-        error.InvalidUtf8,
-        error.InvalidUtf16,
-        error.MismatchedEndTag,
-        error.SyntaxError,
-        error.UndeclaredEntityReference,
-        error.UndeclaredNsPrefix,
-        error.UnexpectedEndOfInput,
-        error.QNameNotAllowed,
-        => return try ctx.pass(@"test"),
-        else => |other_e| return other_e,
-    }
-}
-
-fn writeCanonical(allocator: Allocator, buf: *ArrayListUnmanaged(u8), event: xml.Event) !void {
-    switch (event) {
-        .xml_declaration, .comment => {},
-        .element_start => |element_start| {
-            try buf.append(allocator, '<');
-            try writeQName(allocator, buf, element_start.name);
-            const attrs = try allocator.dupe(xml.Event.Attribute, element_start.attributes);
-            defer allocator.free(attrs);
-            std.sort.heap(xml.Event.Attribute, attrs, {}, attrLessThan);
-            for (attrs) |attr| {
-                try buf.append(allocator, ' ');
-                try writeQName(allocator, buf, attr.name);
-                try buf.appendSlice(allocator, "=\"");
-                try writeContent(allocator, buf, attr.value);
-                try buf.append(allocator, '"');
-            }
-            try buf.append(allocator, '>');
-        },
-        .element_content => |element_content| {
-            try writeContent(allocator, buf, element_content.content);
-        },
-        .element_end => |element_end| {
-            try buf.appendSlice(allocator, "</");
-            try writeQName(allocator, buf, element_end.name);
-            try buf.append(allocator, '>');
-        },
-        .pi => |pi| {
-            try buf.appendSlice(allocator, "<?");
-            try buf.appendSlice(allocator, pi.target);
-            try buf.append(allocator, ' ');
-            try buf.appendSlice(allocator, pi.content);
-            try buf.appendSlice(allocator, "?>");
-        },
-    }
-}
-
-fn writeQName(allocator: Allocator, buf: *ArrayListUnmanaged(u8), qname: xml.QName) !void {
-    if (qname.prefix) |prefix| {
-        try buf.appendSlice(allocator, prefix);
-        try buf.append(allocator, ':');
-    }
-    try buf.appendSlice(allocator, qname.local);
-}
-
-fn writeContent(allocator: Allocator, buf: *ArrayListUnmanaged(u8), content: []const u8) !void {
-    for (content) |c| {
-        switch (c) {
-            '&' => try buf.appendSlice(allocator, "&amp;"),
-            '<' => try buf.appendSlice(allocator, "&lt;"),
-            '>' => try buf.appendSlice(allocator, "&gt;"),
-            '"' => try buf.appendSlice(allocator, "&quot;"),
-            '\t' => try buf.appendSlice(allocator, "&#9;"),
-            '\n' => try buf.appendSlice(allocator, "&#10;"),
-            '\r' => try buf.appendSlice(allocator, "&#13"),
-            else => try buf.append(allocator, c),
-        }
-    }
-}
-
-fn attrLessThan(_: void, lhs: xml.Event.Attribute, rhs: xml.Event.Attribute) bool {
-    // This is a pretty stupid implementation, but it should work for all
-    // reasonable test cases
-    var lhs_buf: [1024]u8 = undefined;
-    const lhs_name = if (lhs.name.ns) |ns|
-        std.fmt.bufPrint(&lhs_buf, "{s}:{s}", .{ ns, lhs.name.local }) catch @panic("attribute name too long")
-    else
-        lhs.name.local;
-
-    var rhs_buf: [1024]u8 = undefined;
-    const rhs_name = if (rhs.name.ns) |ns|
-        std.fmt.bufPrint(&rhs_buf, "{s}:{s}", .{ ns, rhs.name.local }) catch @panic("attribute name too long")
-    else
-        rhs.name.local;
-
-    return mem.lessThan(u8, lhs_name, rhs_name);
-}
diff --git a/xmlconf/build.zig b/xmlconf/build.zig
new file mode 100644
index 0000000..cc991f4
--- /dev/null
+++ b/xmlconf/build.zig
@@ -0,0 +1,44 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+    const target = b.standardTargetOptions(.{});
+    const optimize = b.standardOptimizeOption(.{});
+
+    const xml = b.dependency("xml", .{
+        .target = target,
+        .optimize = optimize,
+    });
+
+    const xmlconf_exe = b.addExecutable(.{
+        .name = "xmlconf",
+        .root_source_file = b.path("src/xmlconf.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+    xmlconf_exe.root_module.addImport("xml", xml.module("xml"));
+    b.installArtifact(xmlconf_exe);
+
+    const xmlts = b.dependency("xmlts", .{});
+    const xmlts_run = b.addRunArtifact(xmlconf_exe);
+    // Since we can't process DTDs yet, we need to manually specify the test
+    // suite root files individually.
+    const suite_paths: []const []const u8 = &.{
+        "eduni/errata-2e/errata2e.xml",
+        "eduni/errata-3e/errata3e.xml",
+        "eduni/errata-4e/errata4e.xml",
+        "ibm/ibm_oasis_invalid.xml",
+        "ibm/ibm_oasis_not-wf.xml",
+        "ibm/ibm_oasis_valid.xml",
+        "japanese/japanese.xml",
+        "oasis/oasis.xml",
+        // The sun test suite files are not structured in a way we can handle
+        // without DTD support.
+        "xmltest/xmltest.xml",
+    };
+    for (suite_paths) |path| {
+        xmlts_run.addFileArg(xmlts.path(path));
+    }
+
+    const test_step = b.step("test", "Run the tests");
+    test_step.dependOn(&xmlts_run.step);
+}
diff --git a/xmlconf/build.zig.zon b/xmlconf/build.zig.zon
new file mode 100644
index 0000000..02735b6
--- /dev/null
+++ b/xmlconf/build.zig.zon
@@ -0,0 +1,18 @@
+.{
+    .name = "xmlconf",
+    .version = "0.0.0",
+    .paths = .{
+        "build.zig",
+        "build.zig.zon",
+        "src",
+    },
+    .dependencies = .{
+        .xml = .{
+            .path = "..",
+        },
+        .xmlts = .{
+            .url = "https://www.w3.org/XML/Test/xmlts20130923.tar.gz",
+            .hash = "1220322f729089d5371fce0b0777edb9946cc54a389aa372c879d9c0843d862c4bbe",
+        },
+    },
+}
diff --git a/xmlconf/src/xmlconf.zig b/xmlconf/src/xmlconf.zig
new file mode 100644
index 0000000..c9a4faa
--- /dev/null
+++ b/xmlconf/src/xmlconf.zig
@@ -0,0 +1,456 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const log = std.log;
+const xml = @import("xml");
+
+const usage =
+    \\Usage: xmlconf [options] files...
+    \\
+    \\Runs the provided xmlconf test suites.
+    \\
+    \\Options:
+    \\  -h, --help      show help
+    \\  -v, --verbose   increase verbosity
+    \\
+;
+
+var log_tty_config: std.io.tty.Config = undefined; // Will be initialized immediately in main
+var log_level: std.log.Level = .warn;
+
+pub const std_options: std.Options = .{
+    .logFn = logImpl,
+};
+
+pub fn logImpl(
+    comptime level: std.log.Level,
+    comptime scope: @Type(.enum_literal),
+    comptime format: []const u8,
+    args: anytype,
+) void {
+    if (@intFromEnum(level) > @intFromEnum(log_level)) return;
+
+    const prefix = if (scope == .default)
+        comptime level.asText() ++ ": "
+    else
+        comptime level.asText() ++ "(" ++ @tagName(scope) ++ "): ";
+    std.debug.lockStdErr();
+    defer std.debug.unlockStdErr();
+    const stderr = std.io.getStdErr().writer();
+    log_tty_config.setColor(stderr, switch (level) {
+        .err => .bright_red,
+        .warn => .bright_yellow,
+        .info => .bright_blue,
+        .debug => .bright_magenta,
+    }) catch return;
+    stderr.writeAll(prefix) catch return;
+    log_tty_config.setColor(stderr, .reset) catch return;
+    stderr.print(format ++ "\n", args) catch return;
+}
+
+pub fn main() !void {
+    log_tty_config = std.io.tty.detectConfig(std.io.getStdErr());
+
+    var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+    defer arena_state.deinit();
+    const arena = arena_state.allocator();
+
+    var suite_paths = std.ArrayList([]const u8).init(arena);
+
+    var args: ArgIterator = .{ .args = try std.process.argsWithAllocator(arena) };
+    _ = args.next();
+    while (args.next()) |arg| {
+        switch (arg) {
+            .option => |option| if (option.is('h', "help")) {
+                try std.io.getStdOut().writeAll(usage);
+                std.process.exit(0);
+            } else if (option.is('v', "verbose")) {
+                log_level = switch (log_level) {
+                    .err => .warn,
+                    .warn => .info,
+                    .info => .debug,
+                    .debug => .debug,
+                };
+            } else {
+                fatal("unrecognized option: {}", .{option});
+            },
+            .param => |param| {
+                try suite_paths.append(try arena.dupe(u8, param));
+            },
+            .unexpected_value => |unexpected_value| fatal("unexpected value to --{s}: {s}", .{
+                unexpected_value.option,
+                unexpected_value.value,
+            }),
+        }
+    }
+
+    var gpa_state: std.heap.GeneralPurposeAllocator(.{}) = .{};
+    defer _ = gpa_state.deinit();
+    const gpa = gpa_state.allocator();
+
+    var results: Results = .{};
+    for (suite_paths.items) |suite_path| {
+        runFile(gpa, suite_path, &results) catch |err|
+            results.err("running suite {s}: {}", .{ suite_path, err });
+    }
+    std.debug.print("{} passed, {} failed, {} skipped\n", .{ results.passed, results.failed, results.skipped });
+    std.process.exit(if (results.ok()) 0 else 1);
+}
+
+fn fatal(comptime format: []const u8, args: anytype) noreturn {
+    log.err(format, args);
+    std.process.exit(1);
+}
+
+const Results = struct {
+    passed: usize = 0,
+    failed: usize = 0,
+    skipped: usize = 0,
+    run_error: bool = false,
+
+    fn ok(results: Results) bool {
+        return results.failed == 0 and !results.run_error;
+    }
+
+    fn pass(results: *Results, id: []const u8) void {
+        log.debug("pass: {s}", .{id});
+        results.passed += 1;
+    }
+
+    fn fail(results: *Results, id: []const u8, comptime fmt: []const u8, args: anytype) void {
+        log.err("fail: {s}: " ++ fmt, .{id} ++ args);
+        results.failed += 1;
+    }
+
+    fn skip(results: *Results, id: []const u8, comptime fmt: []const u8, args: anytype) void {
+        log.info("skip: {s}: " ++ fmt, .{id} ++ args);
+        results.skipped += 1;
+    }
+
+    fn err(results: *Results, comptime fmt: []const u8, args: anytype) void {
+        log.err(fmt, args);
+        results.run_error = true;
+    }
+};
+
+const max_file_size = 2 * 1024 * 1024;
+
+fn runFile(gpa: Allocator, path: []const u8, results: *Results) !void {
+    var dir = try std.fs.cwd().openDir(std.fs.path.dirname(path) orelse ".", .{});
+    defer dir.close();
+    const data = try dir.readFileAlloc(gpa, std.fs.path.basename(path), max_file_size);
+    defer gpa.free(data);
+    var doc = xml.StaticDocument.init(data);
+    var reader = doc.reader(gpa, .{});
+    defer reader.deinit();
+
+    try reader.skipProlog();
+    if (!std.mem.eql(u8, "TESTCASES", reader.elementName())) return error.InvalidTest;
+    try runSuite(gpa, dir, reader.raw(), results);
+}
+
+fn runSuite(gpa: Allocator, dir: std.fs.Dir, reader: *xml.Reader, results: *Results) !void {
+    if (reader.attributeIndex("PROFILE")) |profile_attr| {
+        log.info("suite: {s}", .{try reader.attributeValue(profile_attr)});
+    }
+
+    while (true) {
+        switch (try reader.read()) {
+            .element_start => if (std.mem.eql(u8, reader.elementName(), "TESTCASES")) {
+                try runSuite(gpa, dir, reader, results);
+            } else if (std.mem.eql(u8, reader.elementName(), "TEST")) {
+                try runTest(gpa, dir, reader, results);
+            } else {
+                return error.InvalidTest;
+            },
+            .element_end => break,
+            else => {},
+        }
+    }
+}
+
+fn runTest(gpa: Allocator, dir: std.fs.Dir, reader: *xml.Reader, results: *Results) !void {
+    const @"type" = type: {
+        const index = reader.attributeIndex("TYPE") orelse return error.InvalidTest;
+        break :type std.meta.stringToEnum(TestType, try reader.attributeValue(index)) orelse return error.InvalidTest;
+    };
+    const id = id: {
+        const index = reader.attributeIndex("ID") orelse return error.InvalidTest;
+        break :id try reader.attributeValueAlloc(gpa, index);
+    };
+    defer gpa.free(id);
+    if (reader.attributeIndex("VERSION")) |index| check_version: {
+        const versions = try reader.attributeValue(index);
+        var iter = std.mem.splitScalar(u8, versions, ' ');
+        while (iter.next()) |version| {
+            if (std.mem.eql(u8, version, "1.0")) break :check_version;
+        }
+        return results.skip(id, "only XML 1.0 is supported", .{});
+    }
+    if (reader.attributeIndex("EDITION")) |index| check_edition: {
+        const editions = try reader.attributeValue(index);
+        var iter = std.mem.splitScalar(u8, editions, ' ');
+        while (iter.next()) |edition| {
+            if (std.mem.eql(u8, edition, "5")) break :check_edition;
+        }
+        return results.skip(id, "only the fifth edition of XML 1.0 is supported", .{});
+    }
+    const namespace = namespace: {
+        const index = reader.attributeIndex("NAMESPACE") orelse break :namespace .yes;
+        break :namespace std.meta.stringToEnum(enum { yes, no }, try reader.attributeValue(index)) orelse return error.InvalidTest;
+    };
+    const input = input: {
+        const index = reader.attributeIndex("URI") orelse return error.InvalidTest;
+        const path = try reader.attributeValue(index);
+        break :input dir.readFileAlloc(gpa, path, max_file_size) catch |err|
+            return results.err("{s}: reading input file: {s}: {}", .{ id, path, err });
+    };
+    defer gpa.free(input);
+    const output = output: {
+        const index = reader.attributeIndex("OUTPUT") orelse break :output null;
+        const path = try reader.attributeValue(index);
+        break :output dir.readFileAlloc(gpa, path, max_file_size) catch |err|
+            return results.err("{s}: reading output file: {s}: {}", .{ id, path, err });
+    };
+    defer if (output) |o| gpa.free(o);
+    try reader.skipElement();
+
+    if (std.mem.startsWith(u8, input, "\xFE\xFF") or
+        std.mem.startsWith(u8, input, "\xFF\xFE"))
+    {
+        return results.skip(id, "UTF-16 unsupported", .{});
+    }
+
+    const options: TestOptions = .{
+        .namespace = namespace == .yes,
+    };
+    switch (@"type") {
+        .valid, .invalid => try runTestParseable(gpa, id, input, output, options, results),
+        .@"not-wf" => try runTestUnparseable(gpa, id, input, options, results),
+        .@"error" => results.skip(id, "not sure how to run error tests", .{}),
+    }
+}
+
+const TestOptions = struct {
+    namespace: bool,
+};
+
+fn runTestParseable(
+    gpa: Allocator,
+    id: []const u8,
+    input: []const u8,
+    output: ?[]const u8,
+    options: TestOptions,
+    results: *Results,
+) !void {
+    var doc = xml.StaticDocument.init(input);
+    var reader = doc.reader(gpa, .{
+        .namespace_aware = options.namespace,
+    });
+    defer reader.deinit();
+
+    var canonical_buf = std.ArrayList(u8).init(gpa);
+    defer canonical_buf.deinit();
+    var canonical_output = xml.streamingOutput(canonical_buf.writer());
+    var canonical = canonical_output.writer(.{});
+
+    while (true) {
+        const node = reader.read() catch |err| switch (err) {
+            error.MalformedXml => {
+                switch (reader.errorCode()) {
+                    .doctype_unsupported => return results.skip(id, "doctype unsupported", .{}),
+                    .xml_declaration_encoding_unsupported => return results.skip(id, "encoding unsupported", .{}),
+                    else => |code| {
+                        const loc = reader.errorLocation();
+                        return results.fail(id, "malformed: {}:{}: {}", .{ loc.line, loc.column, code });
+                    },
+                }
+            },
+            error.OutOfMemory => return error.OutOfMemory,
+        };
+        switch (node) {
+            .eof => break,
+            .xml_declaration, .comment => {}, // ignored in canonical form
+            .element_start => {
+                try canonical.elementStart(reader.elementName());
+
+                const sorted_attrs = try gpa.alloc(usize, reader.attributeCount());
+                defer gpa.free(sorted_attrs);
+                for (0..reader.attributeCount()) |i| sorted_attrs[i] = i;
+                std.sort.pdq(usize, sorted_attrs, reader, struct {
+                    fn lessThan(r: @TypeOf(reader), lhs: usize, rhs: usize) bool {
+                        return std.mem.lessThan(u8, r.attributeName(lhs), r.attributeName(rhs));
+                    }
+                }.lessThan);
+                for (sorted_attrs) |i| {
+                    try canonical.attribute(reader.attributeName(i), try reader.attributeValue(i));
+                }
+            },
+            .element_end => {
+                try canonical.elementEnd(reader.elementName());
+            },
+            .pi => {
+                try canonical.pi(reader.piTarget(), try reader.piData());
+            },
+            .text => {
+                try canonical.text(try reader.text());
+            },
+            .cdata => {
+                try canonical.text(try reader.cdata());
+            },
+            .character_reference => {
+                var buf: [4]u8 = undefined;
+                const len = std.unicode.utf8Encode(reader.characterReferenceChar(), &buf) catch unreachable;
+                try canonical.text(buf[0..len]);
+            },
+            .entity_reference => {
+                const value = xml.predefined_entities.get(reader.entityReferenceName()) orelse unreachable;
+                try canonical.text(value);
+            },
+        }
+    }
+
+    if (output) |expected_canonical| {
+        if (!std.mem.eql(u8, canonical_buf.items, expected_canonical)) {
+            return results.fail(
+                id,
+                "canonical output does not match\n\nexpected:\n{s}\n\nactual:{s}",
+                .{ expected_canonical, canonical_buf.items },
+            );
+        }
+    }
+    return results.pass(id);
+}
+
+fn runTestUnparseable(
+    gpa: Allocator,
+    id: []const u8,
+    input: []const u8,
+    options: TestOptions,
+    results: *Results,
+) !void {
+    var doc = xml.StaticDocument.init(input);
+    var reader = doc.reader(gpa, .{
+        .namespace_aware = options.namespace,
+    });
+    defer reader.deinit();
+
+    while (true) {
+        const node = reader.read() catch |err| switch (err) {
+            error.MalformedXml => switch (reader.errorCode()) {
+                .doctype_unsupported => return results.skip(id, "doctype unsupported", .{}),
+                .xml_declaration_encoding_unsupported => return results.skip(id, "encoding unsupported", .{}),
+                else => return results.pass(id),
+            },
+            error.OutOfMemory => return error.OutOfMemory,
+        };
+        if (node == .eof) return results.fail(id, "expected to fail to parse", .{});
+    }
+}
+
+const TestType = enum {
+    valid,
+    invalid,
+    @"not-wf",
+    @"error",
+};
+
+// Inspired by https://github.com/judofyr/parg
+const ArgIterator = struct {
+    args: std.process.ArgIterator,
+    state: union(enum) {
+        normal,
+        short: []const u8,
+        long: struct {
+            option: []const u8,
+            value: []const u8,
+        },
+        params_only,
+    } = .normal,
+
+    const Arg = union(enum) {
+        option: union(enum) {
+            short: u8,
+            long: []const u8,
+
+            fn is(option: @This(), short: ?u8, long: ?[]const u8) bool {
+                return switch (option) {
+                    .short => |c| short == c,
+                    .long => |s| std.mem.eql(u8, long orelse return false, s),
+                };
+            }
+
+            pub fn format(option: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
+                switch (option) {
+                    .short => |c| try writer.print("-{c}", .{c}),
+                    .long => |s| try writer.print("--{s}", .{s}),
+                }
+            }
+        },
+        param: []const u8,
+        unexpected_value: struct {
+            option: []const u8,
+            value: []const u8,
+        },
+    };
+
+    fn deinit(iter: *ArgIterator) void {
+        iter.args.deinit();
+        iter.* = undefined;
+    }
+
+    fn next(iter: *ArgIterator) ?Arg {
+        switch (iter.state) {
+            .normal => {
+                const arg = iter.args.next() orelse return null;
+                if (std.mem.eql(u8, arg, "--")) {
+                    iter.state = .params_only;
+                    return .{ .param = iter.args.next() orelse return null };
+                } else if (std.mem.startsWith(u8, arg, "--")) {
+                    if (std.mem.indexOfScalar(u8, arg, '=')) |equals_index| {
+                        const option = arg["--".len..equals_index];
+                        iter.state = .{ .long = .{
+                            .option = option,
+                            .value = arg[equals_index + 1 ..],
+                        } };
+                        return .{ .option = .{ .long = option } };
+                    } else {
+                        return .{ .option = .{ .long = arg["--".len..] } };
+                    }
+                } else if (std.mem.startsWith(u8, arg, "-") and arg.len > 1) {
+                    if (arg.len > 2) {
+                        iter.state = .{ .short = arg["-".len + 1 ..] };
+                    }
+                    return .{ .option = .{ .short = arg["-".len] } };
+                } else {
+                    return .{ .param = arg };
+                }
+            },
+            .short => |rest| {
+                if (rest.len > 1) {
+                    iter.state = .{ .short = rest[1..] };
+                }
+                return .{ .option = .{ .short = rest[0] } };
+            },
+            .long => |long| return .{ .unexpected_value = .{
+                .option = long.option,
+                .value = long.value,
+            } },
+            .params_only => return .{ .param = iter.args.next() orelse return null },
+        }
+    }
+
+    fn optionValue(iter: *ArgIterator) ?[]const u8 {
+        switch (iter.state) {
+            .normal => return iter.args.next(),
+            .short => |rest| {
+                iter.state = .normal;
+                return rest;
+            },
+            .long => |long| {
+                iter.state = .normal;
+                return long.value;
+            },
+            .params_only => unreachable,
+        }
+    }
+};