Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/XmlTransformStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ export default class XmlTransformStream extends TransformStream<Uint8Array, Uint
// Guard anyway in case someone uses this TransformStream with an unexpected stream type
throw new Error('Received non-Uint8Array chunk');
}
let chunkAsString = textDecoder.decode(chunk);
let chunkAsString = textDecoder.decode(chunk, { stream: true });

// Whenever a chunk is added, it is added to the currently processing chunk, and an attempt is made to
// parse it.
Expand All @@ -68,6 +68,9 @@ export default class XmlTransformStream extends TransformStream<Uint8Array, Uint
});
},
async flush(controller: TransformStreamDefaultController<Uint8Array>) {
// Flush and append any buffered bytes in the decoder (e.g incomplete multibyte chars)
xmlStreamerContext.append(textDecoder.decode());

xmlStreamerContext.flush(true);

await dispatchCompleteTopLevelChildren(chunk => {
Expand Down
46 changes: 46 additions & 0 deletions test/unit/classes/XmlTransformStream.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,4 +128,50 @@ describe('XmlTransformStream', () => {
});
});

it("preserves UTF-8 multibyte characters split across chunks", async () => {
const document = new XmlDocument();
const transformer: IXmlTransformer = {
async transformElementNode(xmlElementNode: XmlElementNode): Promise<XmlElementNode | null> {
return xmlElementNode;
}
};
const xmlTransformStream = new XmlTransformStream(document, transformer);

const stream = new ReadableStream<Uint8Array>({
start(controller) {
const encoder = new TextEncoder();
controller.enqueue(new Uint8Array([...encoder.encode("ö"), 0xc3]));
controller.enqueue(new Uint8Array([0xb6, ...encoder.encode("ö")]));
controller.close();
},
});

const transformed = stream.pipeThrough(xmlTransformStream);
const res = await readableStreamToString(transformed);

assert.strictEqual(res, "ööö");
});

it("flushes a dangling UTF-8 lead byte at EOF into U+FFFD", async () => {
const document = new XmlDocument();
const transformer: IXmlTransformer = {
async transformElementNode(xmlElementNode: XmlElementNode): Promise<XmlElementNode | null> {
return xmlElementNode;
}
};
const xmlTransformStream = new XmlTransformStream(document, transformer);

const stream = new ReadableStream<Uint8Array>({
start(controller) {
const encoder = new TextEncoder();
controller.enqueue(new Uint8Array([...encoder.encode("ö"), 0xc3]));
controller.close();
},
});

const transformed = stream.pipeThrough(xmlTransformStream);
const res = await readableStreamToString(transformed);

assert.strictEqual(res, "ö�");
});
});