Skip to content

Commit 947ea29

Browse files
committed
fix: Validate the output of the feed xml parser. Fixes hoarder-app#1085
1 parent 2399dcb commit 947ea29

File tree

1 file changed

+19
-8
lines changed

1 file changed

+19
-8
lines changed

apps/workers/feedWorker.ts

+19-8
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { DequeuedJob, Runner } from "liteque";
33
import cron from "node-cron";
44
import Parser from "rss-parser";
55
import { buildImpersonatingTRPCClient } from "trpc";
6+
import { z } from "zod";
67

78
import type { ZFeedRequestSchema } from "@hoarder/shared/queues";
89
import { db } from "@hoarder/db";
@@ -123,19 +124,31 @@ async function run(req: DequeuedJob<ZFeedRequestSchema>) {
123124
item: ["id"],
124125
},
125126
});
126-
const feedData = await parser.parseString(xmlData);
127+
const unparseFeedData = await parser.parseString(xmlData);
128+
129+
// Apparently, we can't trust the output of the xml parser. So let's do our own type
130+
// validation.
131+
const feedItemsSchema = z.object({
132+
id: z.coerce.string(),
133+
link: z.string().optional(),
134+
guid: z.string().optional(),
135+
});
136+
137+
const feedItems = unparseFeedData.items
138+
.map((i) => feedItemsSchema.safeParse(i))
139+
.flatMap((i) => (i.success ? [i.data] : []));
127140

128141
logger.info(
129-
`[feed][${jobId}] Found ${feedData.items.length} entries in feed "${feed.name}" (${feed.id}) ...`,
142+
`[feed][${jobId}] Found ${feedItems.length} entries in feed "${feed.name}" (${feed.id}) ...`,
130143
);
131144

132-
if (feedData.items.length === 0) {
145+
if (feedItems.length === 0) {
133146
logger.info(`[feed][${jobId}] No entries found.`);
134147
return;
135148
}
136149

137150
// For feeds that don't have guids, use the link as the id
138-
feedData.items.forEach((item) => {
151+
feedItems.forEach((item) => {
139152
item.guid = item.guid ?? `${item.id}` ?? item.link;
140153
});
141154

@@ -144,14 +157,12 @@ async function run(req: DequeuedJob<ZFeedRequestSchema>) {
144157
eq(rssFeedImportsTable.rssFeedId, feed.id),
145158
inArray(
146159
rssFeedImportsTable.entryId,
147-
feedData.items
148-
.map((item) => item.guid)
149-
.filter((id): id is string => !!id),
160+
feedItems.map((item) => item.guid).filter((id): id is string => !!id),
150161
),
151162
),
152163
});
153164

154-
const newEntries = feedData.items.filter(
165+
const newEntries = feedItems.filter(
155166
(item) =>
156167
!exitingEntries.some((entry) => entry.entryId === item.guid) &&
157168
item.link &&

0 commit comments

Comments
 (0)