diff --git a/bench/src/scanner.zig b/bench/src/scanner.zig
index 4820412..933f168 100644
--- a/bench/src/scanner.zig
+++ b/bench/src/scanner.zig
@@ -1,13 +1,16 @@
+const std = @import("std");
 const xml = @import("xml");
 
 pub const main = @import("common.zig").main;
 
 pub fn runBench(data: []const u8) !void {
     var scanner = xml.Scanner{};
+    var data_stream = std.io.fixedBufferStream(data);
     var decoder = xml.encoding.Utf8Decoder{};
-    for (data) |b| {
-        if (try decoder.next(b)) |c| {
-            _ = try scanner.next(c, 1);
-        }
+    var buf: [4]u8 = undefined;
+    while (true) {
+        const c = try decoder.readCodepoint(data_stream.reader(), &buf);
+        if (!c.present) break;
+        _ = try scanner.next(c.codepoint, c.byte_length);
     }
 }
diff --git a/examples/scan.zig b/examples/scan.zig
index 98e90a8..faccaa0 100644
--- a/examples/scan.zig
+++ b/examples/scan.zig
@@ -27,19 +27,11 @@ pub fn main() !void {
 
     var line: usize = 1;
     var column: usize = 1;
-    read: while (true) {
-        var codepoint_bytes: usize = 0;
-        const c = while (true) {
-            const b = input_reader.readByte() catch |e| switch (e) {
-                error.EndOfStream => break :read,
-                else => |other| return other,
-            };
-            codepoint_bytes += 1;
-            if (try decoder.next(b)) |codepoint| {
-                break codepoint;
-            }
-        };
-        const token = scanner.next(c, codepoint_bytes) catch |e| {
+    while (true) {
+        var buf: [4]u8 = undefined;
+        const c = try decoder.readCodepoint(input_reader, &buf);
+        if (!c.present) break;
+        const token = scanner.next(c.codepoint, c.byte_length) catch |e| {
             try stdout_buffered_writer.flush();
             try stderr.print("error: {} ({}:{}): {}\n", .{ scanner.pos, line, column, e });
             return;
@@ -47,7 +39,7 @@ pub fn main() !void {
         if (token != .ok) {
             try stdout.print("{} ({}:{}): {}\n", .{ scanner.pos, line, column, token });
         }
-        if (c == '\n') {
+        if (c.codepoint == '\n') {
             line += 1;
             column = 1;
         } else {
diff --git a/src/encoding.zig b/src/encoding.zig
index d08153c..936230f 100644
--- a/src/encoding.zig
+++ b/src/encoding.zig
@@ -6,18 +6,16 @@
 //!
 //! - `const max_encoded_codepoint_len` - the maximum number of bytes a
 //!    single Unicode codepoint may occupy in encoded form.
-//! - `fn next(self: *Decoder, b: u8) Error!?u21` - accepts a single byte of
-//!   input, returning an error if the byte is invalid in the current state of
-//!   the decoder, a valid Unicode codepoint, or `null` if the byte is valid
-//!   but there is not yet a full codepoint to return.
+//! - `fn readCodepoint(self: *Decoder, reader: anytype, buf: []u8) (Error || @TypeOf(reader).Error))!ReadResult` -
+//!   reads a single codepoint from a `std.io.GenericReader` and writes its UTF-8
+//!   encoding to `buf`. Should return `error.UnexpectedEndOfInput` if a full
+//!   codepoint cannot be read, `error.Overflow` if the UTF-8-encoded form cannot
+//!   be written to `buf`; other decoder-specific errors can also be used.
 //! - `fn adaptTo(self: *Decoder, encoding: []const u8) error{InvalidEncoding}!void` -
 //!   accepts a UTF-8-encoded encoding name and returns an error if the desired
 //!   encoding cannot be handled by the decoder. This is intended to support
 //!   `Decoder` implementations which adapt to the encoding declared by an XML
 //!   document.
-//! - `fn isUtf8Compatible(self: Decoder) bool` - returns whether this decoder
-//!   decodes a subset of UTF-8. It is always safe to return false if this is
-//!   not known.
 
 const std = @import("std");
 const ascii = std.ascii;
@@ -27,6 +25,26 @@ const Allocator = std.mem.Allocator;
 const ArrayListUnmanaged = std.ArrayListUnmanaged;
 const BoundedArray = std.BoundedArray;
 
+/// The result of reading a single codepoint successfully.
+pub const ReadResult = packed struct(u32) {
+    /// The codepoint read.
+    codepoint: u21,
+    /// The length of the codepoint encoded in UTF-8.
+    byte_length: u10,
+    /// If https://github.com/ziglang/zig/issues/104 is implemented, a much
+    /// better API would be to make `ReadResult` a `packed struct(u31)` instead
+    /// and use `?ReadResult` elsewhere. But, for now, this indicates whether
+    /// `codepoint` and `byte_length` are present, so that the whole thing fits
+    /// in a `u32` rather than unnecessarily taking up 8 bytes.
+    present: bool = true,
+
+    pub const none: ReadResult = .{
+        .codepoint = 0,
+        .byte_length = 0,
+        .present = false,
+    };
+};
+
 /// A decoder which handles UTF-8 or UTF-16, using a BOM to detect UTF-16
 /// endianness.
 ///
@@ -35,123 +53,134 @@ const BoundedArray = std.BoundedArray;
 pub const DefaultDecoder = struct {
     state: union(enum) {
         start,
-        utf16_be_bom,
-        utf16_le_bom,
         utf8: Utf8Decoder,
-        utf16_le: Utf16Decoder(.little),
-        utf16_be: Utf16Decoder(.big),
+        utf16_le: Utf16Decoder(.Little),
+        utf16_be: Utf16Decoder(.Big),
     } = .start,
 
-    pub const Error = error{ InvalidUtf8, InvalidUtf16 };
+    pub const Error = Utf8Decoder.Error || Utf16Decoder(.Little).Error || Utf16Decoder(.Big).Error;
 
     pub const max_encoded_codepoint_len = 4;
+    const bom = 0xFEFF;
+    const bom_byte_length = unicode.utf8CodepointSequenceLength(bom) catch unreachable;
 
-    pub fn next(self: *DefaultDecoder, b: u8) Error!?u21 {
+    pub fn readCodepoint(self: *DefaultDecoder, reader: anytype, buf: []u8) (Error || @TypeOf(reader).Error)!ReadResult {
         switch (self.state) {
-            .start => if (b == 0xFE) {
-                self.state = .utf16_be_bom;
-                return null;
-            } else if (b == 0xFF) {
-                self.state = .utf16_le_bom;
-                return null;
-            } else {
-                self.state = .{ .utf8 = .{} };
-                return try self.state.utf8.next(b);
-            },
-            .utf16_be_bom => if (b == 0xFF) {
+            .start => {},
+            inline else => |*inner| return inner.readCodepoint(reader, buf),
+        }
+        // If attempting to match the UTF-16 BOM fails for whatever reason, we
+        // will assume we are reading UTF-8.
+        self.state = .{ .utf8 = .{} };
+        const b = reader.readByte() catch |e| switch (e) {
+            error.EndOfStream => return error.UnexpectedEndOfInput,
+            else => |other| return other,
+        };
+        switch (b) {
+            0xFE => {
+                const b2 = reader.readByte() catch |e| switch (e) {
+                    error.EndOfStream => return error.InvalidUtf8,
+                    else => |other| return other,
+                };
+                if (b2 != 0xFF) return error.InvalidUtf8;
                 self.state = .{ .utf16_be = .{} };
-                return 0xFEFF;
-            } else {
-                self.state = .{ .utf8 = .{} };
-                return error.InvalidUtf8;
+                if (bom_byte_length > buf.len) return error.Overflow;
+                _ = unicode.utf8Encode(bom, buf) catch unreachable;
+                return .{ .codepoint = bom, .byte_length = bom_byte_length };
             },
-            .utf16_le_bom => if (b == 0xFE) {
+            0xFF => {
+                const b2 = reader.readByte() catch |e| switch (e) {
+                    error.EndOfStream => return error.InvalidUtf8,
+                    else => |other| return other,
+                };
+                if (b2 != 0xFE) return error.InvalidUtf8;
                 self.state = .{ .utf16_le = .{} };
-                return 0xFEFF;
-            } else {
-                self.state = .{ .utf8 = .{} };
-                return error.InvalidUtf8;
+                if (bom_byte_length > buf.len) return error.Overflow;
+                _ = unicode.utf8Encode(bom, buf) catch unreachable;
+                return .{ .codepoint = bom, .byte_length = bom_byte_length };
+            },
+            else => {
+                // The rest of this branch is copied from Utf8Decoder
+                const byte_length = unicode.utf8ByteSequenceLength(b) catch return error.InvalidUtf8;
+                if (byte_length > buf.len) return error.Overflow;
+                buf[0] = b;
+                if (byte_length == 1) return .{ .codepoint = b, .byte_length = 1 };
+                reader.readNoEof(buf[1..byte_length]) catch |e| switch (e) {
+                    error.EndOfStream => return error.UnexpectedEndOfInput,
+                    else => |other| return other,
+                };
+                const codepoint = switch (byte_length) {
+                    2 => unicode.utf8Decode2(buf[0..2]),
+                    3 => unicode.utf8Decode3(buf[0..3]),
+                    4 => unicode.utf8Decode4(buf[0..4]),
+                    else => unreachable,
+                } catch return error.InvalidUtf8;
+                return .{ .codepoint = codepoint, .byte_length = byte_length };
             },
-            inline else => |*decoder| return try decoder.next(b),
         }
     }
 
     pub fn adaptTo(self: *DefaultDecoder, encoding: []const u8) error{InvalidEncoding}!void {
         switch (self.state) {
-            .start, .utf16_be_bom, .utf16_le_bom => {},
+            .start => {},
             inline else => |*decoder| try decoder.adaptTo(encoding),
         }
     }
-
-    pub inline fn isUtf8Compatible(self: DefaultDecoder) bool {
-        return self.state == .utf8;
-    }
 };
 
 test DefaultDecoder {
     // UTF-8 no BOM
     {
-        var decoder = DefaultDecoder{};
-        try testing.expectEqual(@as(?u21, 'H'), try decoder.next('H'));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xC3));
-        try testing.expectEqual(@as(?u21, 'ü'), try decoder.next(0xBC));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xE6));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x97));
-        try testing.expectEqual(@as(?u21, '日'), try decoder.next(0xA5));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xF0));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x9F));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x98));
-        try testing.expectEqual(@as(?u21, '😀'), try decoder.next(0x80));
+        const input = "Hü日😀";
+        var decoder = try testDecode(DefaultDecoder, input, &.{
+            'H',
+            'ü',
+            '日',
+            '😀',
+        });
         try decoder.adaptTo("utf-8");
         try decoder.adaptTo("UTF-8");
     }
 
     // UTF-8 BOM
     {
-        var decoder = DefaultDecoder{};
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xEF));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xBB));
-        try testing.expectEqual(@as(?u21, 0xFEFF), try decoder.next(0xBF));
-        try testing.expectEqual(@as(?u21, 'H'), try decoder.next('H'));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xC3));
-        try testing.expectEqual(@as(?u21, 'ü'), try decoder.next(0xBC));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xE6));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x97));
-        try testing.expectEqual(@as(?u21, '日'), try decoder.next(0xA5));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xF0));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x9F));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x98));
-        try testing.expectEqual(@as(?u21, '😀'), try decoder.next(0x80));
+        const input = "\u{FEFF}Hü日😀";
+        var decoder = try testDecode(DefaultDecoder, input, &.{
+            0xFEFF,
+            'H',
+            'ü',
+            '日',
+            '😀',
+        });
         try decoder.adaptTo("utf-8");
         try decoder.adaptTo("UTF-8");
     }
 
     // Invalid UTF-8 BOM
     {
-        var decoder = DefaultDecoder{};
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xEF));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x00));
-        try testing.expectError(error.InvalidUtf8, decoder.next(0x00));
-        try testing.expectEqual(@as(?u21, 'H'), try decoder.next('H'));
+        const input = "\xEF\x00\x00H";
+        var decoder = try testDecode(DefaultDecoder, input, &.{
+            error.InvalidUtf8,
+            'H',
+        });
         try decoder.adaptTo("utf-8");
         try decoder.adaptTo("UTF-8");
     }
 
     // UTF-16BE BOM
     {
-        var decoder = DefaultDecoder{};
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xFE));
-        try testing.expectEqual(@as(?u21, 0xFEFF), try decoder.next(0xFF));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x00));
-        try testing.expectEqual(@as(?u21, 'H'), try decoder.next('H'));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x00));
-        try testing.expectEqual(@as(?u21, 'ü'), try decoder.next(0xFC));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x65));
-        try testing.expectEqual(@as(?u21, '日'), try decoder.next(0xE5));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xD8));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x3D));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xDE));
-        try testing.expectEqual(@as(?u21, '😀'), try decoder.next(0x00));
+        const input = "\xFE\xFF" ++ // U+FEFF
+            "\x00H" ++
+            "\x00\xFC" ++ // ü
+            "\x65\xE5" ++ // 日
+            "\xD8\x3D\xDE\x00"; // 😀
+        var decoder = try testDecode(DefaultDecoder, input, &.{
+            0xFEFF,
+            'H',
+            'ü',
+            '日',
+            '😀',
+        });
         try decoder.adaptTo("utf-16");
         try decoder.adaptTo("UTF-16");
         try decoder.adaptTo("utf-16be");
@@ -160,29 +189,29 @@ test DefaultDecoder {
 
     // Invalid UTF-16BE BOM
     {
-        var decoder = DefaultDecoder{};
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xFE));
-        try testing.expectError(error.InvalidUtf8, decoder.next(0x00));
-        try testing.expectEqual(@as(?u21, 'H'), try decoder.next('H'));
+        const input = "\xFE\x00H";
+        var decoder = try testDecode(DefaultDecoder, input, &.{
+            error.InvalidUtf8,
+            'H',
+        });
         try decoder.adaptTo("utf-8");
         try decoder.adaptTo("UTF-8");
     }
 
     // UTF-16LE BOM
     {
-        var decoder = DefaultDecoder{};
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xFF));
-        try testing.expectEqual(@as(?u21, 0xFEFF), try decoder.next(0xFE));
-        try testing.expectEqual(@as(?u21, null), try decoder.next('H'));
-        try testing.expectEqual(@as(?u21, 'H'), try decoder.next(0x00));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xFC));
-        try testing.expectEqual(@as(?u21, 'ü'), try decoder.next(0x00));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xE5));
-        try testing.expectEqual(@as(?u21, '日'), try decoder.next(0x65));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x3D));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xD8));
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0x00));
-        try testing.expectEqual(@as(?u21, '😀'), try decoder.next(0xDE));
+        const input = "\xFF\xFE" ++ // U+FEFF
+            "H\x00" ++
+            "\xFC\x00" ++ // ü
+            "\xE5\x65" ++ // 日
+            "\x3D\xD8\x00\xDE"; // 😀
+        var decoder = try testDecode(DefaultDecoder, input, &.{
+            0xFEFF,
+            'H',
+            'ü',
+            '日',
+            '😀',
+        });
         try decoder.adaptTo("utf-16");
         try decoder.adaptTo("UTF-16");
         try decoder.adaptTo("utf-16le");
@@ -191,10 +220,11 @@ test DefaultDecoder {
 
     // Invalid UTF-16LE BOM
     {
-        var decoder = DefaultDecoder{};
-        try testing.expectEqual(@as(?u21, null), try decoder.next(0xFF));
-        try testing.expectError(error.InvalidUtf8, decoder.next(0xFF));
-        try testing.expectEqual(@as(?u21, 'H'), try decoder.next('H'));
+        const input = "\xFF\xFFH";
+        var decoder = try testDecode(DefaultDecoder, input, &.{
+            error.InvalidUtf8,
+            'H',
+        });
         try decoder.adaptTo("utf-8");
         try decoder.adaptTo("UTF-8");
     }
@@ -202,33 +232,30 @@ test DefaultDecoder {
 
 /// A decoder which handles only UTF-8.
 pub const Utf8Decoder = struct {
-    buffer: BoundedArray(u8, 4) = .{},
-    expecting: u3 = 0,
-
-    pub const Error = error{InvalidUtf8};
-
     pub const max_encoded_codepoint_len = 4;
 
-    pub fn next(self: *Utf8Decoder, b: u8) Error!?u21 {
-        if (self.expecting == 0) {
-            const len = unicode.utf8ByteSequenceLength(b) catch return error.InvalidUtf8;
-            if (len == 1) {
-                return b;
-            }
-            self.expecting = len;
-            self.buffer.appendAssumeCapacity(b);
-            return null;
-        } else {
-            self.buffer.appendAssumeCapacity(b);
-            if (self.buffer.len == self.expecting) {
-                const codepoint_or_error = unicode.utf8Decode(self.buffer.slice());
-                self.expecting = 0;
-                self.buffer.len = 0;
-                return codepoint_or_error catch error.InvalidUtf8;
-            } else {
-                return null;
-            }
-        }
+    pub const Error = error{ InvalidUtf8, Overflow, UnexpectedEndOfInput };
+
+    pub fn readCodepoint(_: *Utf8Decoder, reader: anytype, buf: []u8) (Error || @TypeOf(reader).Error)!ReadResult {
+        const b = reader.readByte() catch |e| switch (e) {
+            error.EndOfStream => return ReadResult.none,
+            else => |other| return other,
+        };
+        const byte_length = unicode.utf8ByteSequenceLength(b) catch return error.InvalidUtf8;
+        if (byte_length > buf.len) return error.Overflow;
+        buf[0] = b;
+        if (byte_length == 1) return .{ .codepoint = b, .byte_length = 1 };
+        reader.readNoEof(buf[1..byte_length]) catch |e| switch (e) {
+            error.EndOfStream => return error.UnexpectedEndOfInput,
+            else => |other| return other,
+        };
+        const codepoint = switch (byte_length) {
+            2 => unicode.utf8Decode2(buf[0..2]),
+            3 => unicode.utf8Decode3(buf[0..3]),
+            4 => unicode.utf8Decode4(buf[0..4]),
+            else => unreachable,
+        } catch return error.InvalidUtf8;
+        return .{ .codepoint = codepoint, .byte_length = byte_length };
     }
 
     pub fn adaptTo(_: *Utf8Decoder, encoding: []const u8) error{InvalidEncoding}!void {
@@ -236,10 +263,6 @@ pub const Utf8Decoder = struct {
             return error.InvalidEncoding;
         }
     }
-
-    pub inline fn isUtf8Compatible(_: Utf8Decoder) bool {
-        return true;
-    }
 };
 
 test Utf8Decoder {
@@ -258,7 +281,7 @@ test Utf8Decoder {
         "\xF7\xBF\xBF\xBF" ++
         // Surrogate halves
         "\xED\xA0\x80\xED\xBF\xBF";
-    const expected: []const (error{InvalidUtf8}!u21) = &.{
+    _ = try testDecode(Utf8Decoder, input, &.{
         '\x00',
         '\x01',
         ' ',
@@ -309,94 +332,53 @@ test Utf8Decoder {
         error.InvalidUtf8, // attempted U+1FFFFF
         error.InvalidUtf8, // U+D800
         error.InvalidUtf8, // U+DFFF
-    };
-
-    var decoded = ArrayListUnmanaged(error{InvalidUtf8}!u21){};
-    defer decoded.deinit(testing.allocator);
-    var decoder = Utf8Decoder{};
-    for (input) |b| {
-        if (decoder.next(b)) |maybe_c| {
-            if (maybe_c) |c| {
-                try decoded.append(testing.allocator, c);
-            }
-        } else |err| {
-            try decoded.append(testing.allocator, err);
-        }
-    }
-
-    try testing.expectEqualDeep(expected, decoded.items);
+    });
 }
 
-pub const Utf16Endianness = enum {
-    big,
-    little,
-};
-
 /// A decoder which handles only UTF-16 of a given endianness.
-pub fn Utf16Decoder(comptime endianness: Utf16Endianness) type {
+pub fn Utf16Decoder(comptime endian: std.builtin.Endian) type {
     return struct {
-        buffer: BoundedArray(u8, 2) = .{},
-        high_unit: ?u10 = null,
-
         const Self = @This();
 
-        pub const Error = error{InvalidUtf16};
+        pub const Error = error{ InvalidUtf16, Overflow, UnexpectedEndOfInput };
 
         pub const max_encoded_codepoint_len = 4;
 
-        pub fn next(self: *Self, b: u8) Error!?u21 {
-            self.buffer.appendAssumeCapacity(b);
-            if (self.buffer.len == 1) {
-                return null;
-            }
-            const u = self.takeCodeUnit();
-            if (self.high_unit) |high_unit| {
-                self.high_unit = null;
-                if (!isLowSurrogate(u)) {
-                    return error.InvalidUtf16;
-                }
-                return 0x10000 + ((@as(u21, high_unit) << 10) | surrogateValue(u));
-            } else if (isHighSurrogate(u)) {
-                self.high_unit = surrogateValue(u);
-                return null;
-            } else if (isLowSurrogate(u)) {
-                return error.InvalidUtf16;
-            } else {
-                return u;
+        pub fn readCodepoint(_: *Self, reader: anytype, buf: []u8) (Error || @TypeOf(reader).Error)!ReadResult {
+            var u_buf: [2]u8 = undefined;
+            const u_len = try reader.readAll(&u_buf);
+            switch (u_len) {
+                0 => return ReadResult.none,
+                1 => return error.UnexpectedEndOfInput,
+                else => {},
             }
-        }
-
-        inline fn takeCodeUnit(self: *Self) u16 {
-            const b1 = self.buffer.buffer[0];
-            const b2 = self.buffer.buffer[1];
-            self.buffer.len = 0;
-            return if (endianness == .big) (@as(u16, b1) << 8) + b2 else (@as(u16, b2) << 8) + b1;
-        }
-
-        inline fn isHighSurrogate(u: u16) bool {
-            return u & ~@as(u16, 0x3FF) == 0xD800;
-        }
-
-        inline fn isLowSurrogate(u: u16) bool {
-            return u & ~@as(u16, 0x3FF) == 0xDC00;
-        }
-
-        inline fn surrogateValue(u: u16) u10 {
-            return @intCast(u & 0x3FF);
+            const u = std.mem.readInt(u16, &u_buf, endian);
+            const code_unit_length = unicode.utf16CodeUnitSequenceLength(u) catch return error.InvalidUtf16;
+            const codepoint = switch (code_unit_length) {
+                1 => u,
+                2 => codepoint: {
+                    const low = reader.readInt(u16, endian) catch |e| switch (e) {
+                        error.EndOfStream => return error.UnexpectedEndOfInput,
+                        else => |other| return other,
+                    };
+                    break :codepoint unicode.utf16DecodeSurrogatePair(&.{ u, low }) catch return error.InvalidUtf16;
+                },
+                else => unreachable,
+            };
+            const byte_length = unicode.utf8CodepointSequenceLength(codepoint) catch unreachable;
+            if (byte_length > buf.len) return error.Overflow;
+            _ = unicode.utf8Encode(codepoint, buf) catch unreachable;
+            return .{ .codepoint = codepoint, .byte_length = byte_length };
         }
 
         pub fn adaptTo(_: *Self, encoding: []const u8) error{InvalidEncoding}!void {
             if (!(ascii.eqlIgnoreCase(encoding, "utf-16") or
-                (endianness == .big and ascii.eqlIgnoreCase(encoding, "utf-16be")) or
-                (endianness == .little and ascii.eqlIgnoreCase(encoding, "utf-16le"))))
+                (endian == .Big and ascii.eqlIgnoreCase(encoding, "utf-16be")) or
+                (endian == .Little and ascii.eqlIgnoreCase(encoding, "utf-16le"))))
             {
                 return error.InvalidEncoding;
             }
         }
-
-        pub inline fn isUtf8Compatible(_: Self) bool {
-            return false;
-        }
     };
 }
 
@@ -412,7 +394,7 @@ test Utf16Decoder {
             "\x00\xD8\x00\x00" ++ // unpaired high surrogate followed by U+0000
             "\xFF\xDF" // unpaired low surrogate
         ;
-        const expected: []const (error{InvalidUtf16}!u21) = &.{
+        _ = try testDecode(Utf16Decoder(.Little), input, &.{
             '\x00',
             'A',
             'b',
@@ -421,22 +403,7 @@ test Utf16Decoder {
             '😳',
             error.InvalidUtf16,
             error.InvalidUtf16,
-        };
-
-        var decoded = ArrayListUnmanaged(error{InvalidUtf16}!u21){};
-        defer decoded.deinit(testing.allocator);
-        var decoder = Utf16Decoder(.little){};
-        for (input) |b| {
-            if (decoder.next(b)) |maybe_c| {
-                if (maybe_c) |c| {
-                    try decoded.append(testing.allocator, c);
-                }
-            } else |err| {
-                try decoded.append(testing.allocator, err);
-            }
-        }
-
-        try testing.expectEqualDeep(expected, decoded.items);
+        });
     }
 
     // big-endian
@@ -450,7 +417,7 @@ test Utf16Decoder {
             "\xD8\x00\x00\x00" ++ // unpaired high surrogate followed by U+0000
             "\xDF\xFF" // unpaired low surrogate
         ;
-        const expected: []const (error{InvalidUtf16}!u21) = &.{
+        _ = try testDecode(Utf16Decoder(.Big), input, &.{
             '\x00',
             'A',
             'b',
@@ -459,21 +426,26 @@ test Utf16Decoder {
             '😳',
             error.InvalidUtf16,
             error.InvalidUtf16,
-        };
+        });
+    }
+}
 
-        var decoded = ArrayListUnmanaged(error{InvalidUtf16}!u21){};
-        defer decoded.deinit(testing.allocator);
-        var decoder = Utf16Decoder(.big){};
-        for (input) |b| {
-            if (decoder.next(b)) |maybe_c| {
-                if (maybe_c) |c| {
-                    try decoded.append(testing.allocator, c);
-                }
-            } else |err| {
-                try decoded.append(testing.allocator, err);
-            }
+fn testDecode(comptime Decoder: type, input: []const u8, expected: []const (Decoder.Error!u21)) !Decoder {
+    var decoder: Decoder = .{};
+    var decoded = ArrayListUnmanaged(Decoder.Error!u21){};
+    defer decoded.deinit(testing.allocator);
+    var input_stream = std.io.fixedBufferStream(input);
+    var buf: [4]u8 = undefined;
+    while (true) {
+        if (decoder.readCodepoint(input_stream.reader(), &buf)) |c| {
+            if (!c.present) break;
+            try decoded.append(testing.allocator, c.codepoint);
+        } else |err| {
+            try decoded.append(testing.allocator, err);
         }
-
-        try testing.expectEqualDeep(expected, decoded.items);
     }
+
+    try testing.expectEqualDeep(expected, decoded.items);
+
+    return decoder;
 }
diff --git a/src/token_reader.zig b/src/token_reader.zig
index 91e4bb3..d3601c6 100644
--- a/src/token_reader.zig
+++ b/src/token_reader.zig
@@ -224,9 +224,6 @@ pub fn TokenReader(
         ///
         /// This is relevant for line break normalization.
         after_cr: if (options.enable_normalization) bool else void = if (options.enable_normalization) false,
-        /// The length of the raw codepoint data currently stored in `buffer`
-        /// starting at `scanner.pos`.
-        cp_len: usize = 0,
 
         const Self = @This();
 
@@ -281,21 +278,12 @@ pub fn TokenReader(
                     }
                 }
 
-                const c = (try self.nextCodepoint()) orelse {
+                const c = try self.nextCodepoint();
+                if (!c.present) {
                     try self.scanner.endInput();
                     return null;
-                };
-                if (!self.decoder.isUtf8Compatible()) {
-                    // If the decoder is not compatible with UTF-8, we have to
-                    // reencode the codepoint we just read into UTF-8, since
-                    // `buffer` must always be valid UTF-8.
-                    self.cp_len = unicode.utf8CodepointSequenceLength(c) catch unreachable;
-                    if (self.scanner.pos + self.cp_len >= self.buffer.len) {
-                        return error.Overflow;
-                    }
-                    _ = unicode.utf8Encode(c, self.buffer[self.scanner.pos .. self.scanner.pos + self.cp_len]) catch unreachable;
                 }
-                const token = try self.scanner.next(c, self.cp_len);
+                const token = try self.scanner.next(c.codepoint, c.byte_length);
                 if (token != .ok) {
                     return try self.bufToken(token);
                 }
@@ -304,49 +292,36 @@ pub fn TokenReader(
 
         const nextCodepoint = if (options.enable_normalization) nextCodepointNormalized else nextCodepointRaw;
 
-        fn nextCodepointNormalized(self: *Self) !?u21 {
-            var b = (try self.nextCodepointRaw()) orelse return null;
+        fn nextCodepointNormalized(self: *Self) !encoding.ReadResult {
+            var c = try self.nextCodepointRaw();
+            if (!c.present) return c;
             if (self.after_cr) {
                 self.after_cr = false;
-                if (b == '\n') {
+                if (c.codepoint == '\n') {
                     // \n after \r is ignored because \r was already processed
                     // as a line ending
-                    b = (try self.nextCodepointRaw()) orelse return null;
+                    c = try self.nextCodepointRaw();
+                    if (!c.present) return c;
                 }
             }
-            if (b == '\r') {
+            if (c.codepoint == '\r') {
                 self.after_cr = true;
-                b = '\n';
+                c.codepoint = '\n';
                 self.buffer[self.scanner.pos] = '\n';
             }
-            if (self.scanner.state == .attribute_content and (b == '\t' or b == '\r' or b == '\n')) {
-                b = ' ';
+            if (self.scanner.state == .attribute_content and
+                (c.codepoint == '\t' or c.codepoint == '\r' or c.codepoint == '\n'))
+            {
+                c.codepoint = ' ';
                 self.buffer[self.scanner.pos] = ' ';
             }
-            return b;
+            return c;
         }
 
-        fn nextCodepointRaw(self: *Self) !?u21 {
-            self.cp_len = 0;
-            var b = self.reader.readByte() catch |e| switch (e) {
-                error.EndOfStream => return null,
-                else => |other| return other,
-            };
-            while (true) {
-                if (self.scanner.pos + self.cp_len == self.buffer.len) {
-                    return error.Overflow;
-                }
-                self.buffer[self.scanner.pos + self.cp_len] = b;
-                self.cp_len += 1;
-                if (try self.decoder.next(b)) |c| {
-                    self.location.advance(c);
-                    return c;
-                }
-                b = self.reader.readByte() catch |e| switch (e) {
-                    error.EndOfStream => return error.UnexpectedEndOfInput,
-                    else => |other| return other,
-                };
-            }
+        fn nextCodepointRaw(self: *Self) !encoding.ReadResult {
+            const c = try self.decoder.readCodepoint(self.reader, self.buffer[self.scanner.pos..]);
+            if (c.present) self.location.advance(c.codepoint);
+            return c;
         }
 
         fn bufToken(self: *Self, token: Scanner.Token) !Token {