From 6cd9c557eb8b8f6d51bb6d7ee899e3300895e6ce Mon Sep 17 00:00:00 2001 From: Oli Evans Date: Fri, 1 Sep 2023 14:07:25 +0100 Subject: [PATCH] fix: dedupe batch writes (#2) We have duplicate multihash+car entries. We have to dedupe these from batch write commands or dynamo rejects it. Conceptually, it is redundant to track multiple instances of the same block appearing in the same CAR so it is fine and good to dedupe here License: MIT Signed-off-by: Oli Evans --- write-cli.js | 2 ++ write.js | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) mode change 100644 => 100755 write-cli.js diff --git a/write-cli.js b/write-cli.js old mode 100644 new mode 100755 index 71ac9f8..fb16516 --- a/write-cli.js +++ b/write-cli.js @@ -1,3 +1,5 @@ +#!/usr/bin/env node + import { DynamoDBClient } from '@aws-sdk/client-dynamodb' import { createDynamo, createDynamoTable } from './test/_helpers.js' import { write } from './write.js' diff --git a/write.js b/write.js index 0d6bc93..345f479 100644 --- a/write.js +++ b/write.js @@ -36,14 +36,21 @@ export async function write (srcStream, dst, segment, totalSegments, client = ne srcCount += batch.length spinner.suffixText = `src: ${srcCount} dst: ${dstCount}` + // remove duplicates + const itemMap = new Map() + for (const item of batch) { + itemMap.set(`${item.blockmultihash}#${item.carpath}`, item) + } + /** @type {Array { + const puts = Array.from(itemMap.values()).map(item => { return { PutRequest: { Item: marshall(item) } } }) + const cmd = new BatchWriteItemCommand({ RequestItems: { [dst]: puts