Skip to content

Commit 70839f9

Browse files
committed
Initial commit
0 parents  commit 70839f9

7 files changed

+196
-0
lines changed

.gitignore

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
**/.DS_Store
2+
*-debug.log
3+
*-error.log
4+
/.idea
5+
/.tsimp
6+
/.nyc_output
7+
/dist
8+
/lib
9+
/package-lock.json
10+
/tmp
11+
/yarn.lock
12+
node_modules
13+
/*.env
14+
/input
15+
/output

README.md

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# MBox Streamer
2+
3+
A streaming, event-emitting mbox file parser. Give it a stream (or a path-like pointer to an mbox file) — it will happily read through, firing off a 'message' event with a ParsedMail payload for each one it encounters.
4+
5+
## Installation
6+
7+
`npm i eaton/mbox-streamer`
8+
9+
## Usage
10+
11+
```typescript
12+
import { MboxStreamer } from "./streamer.js";
13+
14+
const mbs = new MboxStreamer();
15+
mbs.on('message', message => {
16+
console.log(`${message.date?.toISOString()}: ${message.subject}`);
17+
});
18+
19+
await mbs.parse("./my-email-backup.mbox");
20+
```
21+
22+
See the [mailparser](https://github.com/nodemailer/mailparser) project for details on the `ParsedMail` object that's generated.
23+
24+
The [mbox-to-json](https://github.com/d4data-official/mbox-to-json) project served as this one's original inspiration; rather than accumulating parsed messages and returning them at the end of the operation, however, we emit each one and let the listeners figure out what to keep or discard.
25+
26+
## TODOs
27+
28+
- [ ] Allow parsing options to be passed into the `parse()` function; the mailparser project has a few nice convenience flags that can speed up the work if (for example) you don't care about grabbing HTML versions of email bodies or extracting file attachments.
29+
- [ ] Add a pure Transformer version for folks comfortable piping streams rather than using a wrapper class.

package.json

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
{
2+
"name": "@eatonfyi/mbox-streamer",
3+
"version": "1.0.0",
4+
"description": "Read and parse UNIX Mbox files via stream transforms.",
5+
"type": "module",
6+
"main": "./dist/index.cjs",
7+
"module": "./dist/index.mjs",
8+
"types": "./dist/index.d.cts",
9+
"exports": {
10+
"require": {
11+
"types": "./dist/index.d.cts",
12+
"default": "./dist/index.cjs"
13+
},
14+
"import": {
15+
"types": "./dist/index.d.mts",
16+
"default": "./dist/index.mjs"
17+
}
18+
},
19+
"files": [
20+
"/dist",
21+
"README.md"
22+
],
23+
"scripts": {
24+
"build": "pkgroll",
25+
"clean": "shx rm -rf dist",
26+
"prepare": "npm run clean && npm run build",
27+
"test": "ava"
28+
},
29+
"ava": {
30+
"extensions": {
31+
"ts": "module"
32+
},
33+
"nodeArguments": [
34+
"--import=tsimp"
35+
]
36+
},
37+
"author": "eaton",
38+
"license": "MIT",
39+
"devDependencies": {
40+
"@types/node": "^20.11.30",
41+
"ava": "^6.1.2",
42+
"pkgroll": "^2.0.2",
43+
"shx": "^0.3.4",
44+
"tsimp": "^2.0.11",
45+
"typescript": "^5.4.3"
46+
},
47+
"dependencies": {
48+
"@types/mailparser": "^3.4.4",
49+
"mailparser": "^3.6.9",
50+
"tiny-typed-emitter": "^2.1.0"
51+
}
52+
}

src/index.ts

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
export * from './streamer.js';
2+
export * from './transformer.js';

src/streamer.ts

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import fs from 'fs';
2+
import { Readable } from 'stream';
3+
import { ParsedMail, simpleParser } from 'mailparser';
4+
import { TypedEmitter } from 'tiny-typed-emitter';
5+
import { MboxTransformer } from './transformer.js';
6+
7+
interface MboxStreamerEvents {
8+
message: (message: ParsedMail) => void;
9+
error: (err: any) => void;
10+
close: () => void;
11+
}
12+
13+
export class MboxStreamer extends TypedEmitter<MboxStreamerEvents> {
14+
constructor() {
15+
super();
16+
}
17+
18+
async parse(input: fs.PathLike | Readable, rejectOnError = false) {
19+
const stream = input instanceof Readable ? input : fs.createReadStream(input);
20+
21+
return new Promise<undefined>((resolve, reject) => {
22+
const transform = new MboxTransformer();
23+
const mbox = stream.pipe(transform);
24+
25+
mbox.on('data', message => {
26+
simpleParser(message, { keepCidLinks: true }, (err, mail) => {
27+
if (err) {
28+
if (rejectOnError) {
29+
reject(err);
30+
} else {
31+
this.emit('error', err);
32+
}
33+
} else {
34+
this.emit('message', mail);
35+
}
36+
});
37+
});
38+
39+
mbox.on('close', () => {
40+
this.emit('close');
41+
resolve(undefined);
42+
});
43+
});
44+
}
45+
}

src/transformer.ts

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import { Transform, TransformCallback } from "stream";
2+
3+
export class MboxTransformer extends Transform {
4+
private remaining: string;
5+
6+
constructor() {
7+
super({
8+
readableObjectMode: true,
9+
writableObjectMode: false,
10+
encoding: 'binary',
11+
});
12+
this.remaining = '';
13+
}
14+
15+
_transform(chunk: Buffer, encoding: string, callback: TransformCallback) {
16+
const data = `${this.remaining}${chunk.toString()}`;
17+
this.remaining = '';
18+
const mails = data.split(/^From /m).filter((mail) => mail.length).map((mail) => {
19+
return `From ${mail}`
20+
});
21+
this.remaining = mails.pop() ?? '';
22+
if (mails.length > 0) {
23+
for (const mail of mails) {
24+
this.emit('data', mail);
25+
}
26+
}
27+
callback();
28+
};
29+
30+
_flush(callback: TransformCallback) {
31+
if (this.remaining.length) {
32+
this.emit('data', this.remaining)
33+
this.remaining = '';
34+
callback();
35+
this.emit('close')
36+
}
37+
}
38+
}

tsconfig.json

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"compilerOptions": {
3+
"declaration": true,
4+
"module": "ESNext",
5+
"outDir": "dist",
6+
"rootDir": "src",
7+
"strict": true,
8+
"target": "ESNext",
9+
"lib": ["ESNext"],
10+
"moduleResolution": "Bundler",
11+
"skipLibCheck": true,
12+
"typeRoots": ["./types", "./node_modules/@types"]
13+
},
14+
"include": ["./src/**/*"]
15+
}

0 commit comments

Comments
 (0)