Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add import data from gql script #247

Merged
merged 17 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
/vendor
/wallets
test-results.xml
/scripts/import-data/bundles
/scripts/import-data/transactions
/scripts/import-data/parquet
/scripts/import-data/missing-root-tx-ids

# Generated docs
/docs/sqlite/bundles
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ services:
- ENABLE_BACKGROUND_DATA_VERIFICATION=${ENABLE_BACKGROUND_DATA_VERIFICATION:-}
- BACKGROUND_DATA_VERIFICATION_INTERVAL_SECONDS=${BACKGROUND_DATA_VERIFICATION_INTERVAL_SECONDS:-}
- CLICKHOUSE_URL=${CLICKHOUSE_URL:-}
- BUNDLE_DATA_IMPORTER_QUEUE_SIZE=${BUNDLE_DATA_IMPORTER_QUEUE_SIZE:-}
networks:
- ar-io-network
depends_on:
Expand Down
134 changes: 134 additions & 0 deletions scripts/import-data/count-fetched-ids.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/**
* AR.IO Gateway
* Copyright (C) 2022-2023 Permanent Data Solutions, Inc. All Rights Reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import * as fs from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { getFilesInRange } from './utils.js';
const args = process.argv.slice(2);
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

let TRANSACTIONS_DIR = path.join(__dirname, 'transactions');
let BUNDLES_DIR = path.join(__dirname, 'bundles');
let MIN_BLOCK_HEIGHT = 0;
let MAX_BLOCK_HEIGHT = Infinity;

args.forEach((arg, index) => {
switch (arg) {
case '--transactionsDir':
if (args[index + 1]) {
TRANSACTIONS_DIR = args[index + 1];
} else {
console.error('Missing value for --transactionsDir');
process.exit(1);
}
break;
case '--bundlesDir':
if (args[index + 1]) {
BUNDLES_DIR = args[index + 1];
} else {
console.error('Missing value for --bundlesDir');
process.exit(1);
}
break;
case '--minHeight':
if (args[index + 1]) {
MIN_BLOCK_HEIGHT = parseInt(args[index + 1], 10);
} else {
console.error('Missing value for --minHeight');
process.exit(1);
}
break;
case '--maxHeight':
if (args[index + 1]) {
MAX_BLOCK_HEIGHT = parseInt(args[index + 1], 10);
} else {
console.error('Missing value for --maxHeight');
process.exit(1);
}
break;
default:
break;
}
});
djwhitt marked this conversation as resolved.
Show resolved Hide resolved

const countIds = async ({
folder,
files,
}: {
folder: string;
files: string[];
}) => {
let counter = 0;
for (const file of files) {
const filePath = path.join(folder, file);
const ids = JSON.parse(await fs.readFile(filePath, 'utf-8')) as string[];
counter += ids.length;
}
return counter;
};
djwhitt marked this conversation as resolved.
Show resolved Hide resolved

(async () => {
const transactionFiles = await getFilesInRange({
folder: TRANSACTIONS_DIR,
min: MIN_BLOCK_HEIGHT,
max: MAX_BLOCK_HEIGHT,
});
const bundleFiles = await getFilesInRange({
folder: BUNDLES_DIR,
min: MIN_BLOCK_HEIGHT,
max: MAX_BLOCK_HEIGHT,
});

console.log({ transactionFiles, bundleFiles });

if (transactionFiles.length > 0) {
const firstTransactionHeight = parseInt(
transactionFiles[0].split('.')[0],
10,
);
const lastTransactionHeight = parseInt(
transactionFiles[transactionFiles.length - 1].split('.')[0],
10,
);
const transactionCount = await countIds({
folder: TRANSACTIONS_DIR,
files: transactionFiles,
});

console.log(
`Total transactions from ${firstTransactionHeight} to ${lastTransactionHeight}: ${transactionCount}`,
);
}

if (bundleFiles.length > 0) {
const firstBundleHeight = parseInt(bundleFiles[0].split('.')[0], 10);
const lastBundleHeight = parseInt(
bundleFiles[bundleFiles.length - 1].split('.')[0],
10,
);
const bundleCount = await countIds({
folder: BUNDLES_DIR,
files: bundleFiles,
});

console.log(
`Total bundles from ${firstBundleHeight} to ${lastBundleHeight}: ${bundleCount}`,
);
}
})();
138 changes: 138 additions & 0 deletions scripts/import-data/export-parquet.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/**
* AR.IO Gateway
* Copyright (C) 2022-2023 Permanent Data Solutions, Inc. All Rights Reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { fetchLatestBlockHeight, fetchWithRetry } from './utils.js';
const args = process.argv.slice(2);
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

let ARIO_ENDPOINT = 'http://localhost:4000';
let ADMIN_KEY: string | undefined;
let OUTPUT_DIR = path.join(__dirname, 'parquet');
let MAX_FILE_ROWS = 1_000_000;
let MIN_BLOCK_HEIGHT = 0;
let MAX_BLOCK_HEIGHT: number | undefined;

args.forEach((arg, index) => {
switch (arg) {
case '--adminKey':
if (args[index + 1]) {
ADMIN_KEY = args[index + 1];
} else {
console.error('Missing value for --adminKey');
process.exit(1);
}
break;
djwhitt marked this conversation as resolved.
Show resolved Hide resolved
case '--arioNode':
if (args[index + 1]) {
ARIO_ENDPOINT = args[index + 1];
} else {
console.error('Missing value for --arioNode');
process.exit(1);
}
break;
case '--outputDir':
if (args[index + 1]) {
OUTPUT_DIR = args[index + 1];
} else {
console.error('Missing value for --outputDir');
process.exit(1);
}
break;
case '--minHeight':
if (args[index + 1]) {
MIN_BLOCK_HEIGHT = parseInt(args[index + 1], 10);
} else {
console.error('Missing value for --minHeight');
process.exit(1);
}
break;
case '--maxHeight':
if (args[index + 1]) {
MAX_BLOCK_HEIGHT = parseInt(args[index + 1], 10);
} else {
console.error('Missing value for --maxHeight');
process.exit(1);
}
break;

case '--maxFileRows':
if (args[index + 1]) {
MAX_FILE_ROWS = parseInt(args[index + 1], 10);
} else {
console.error('Missing value for --maxFileRows');
process.exit(1);
}
break;
default:
break;
}
});
karlprieb marked this conversation as resolved.
Show resolved Hide resolved

(async () => {
if (ADMIN_KEY === undefined) {
throw new Error('Missing admin key');
}

if (MAX_BLOCK_HEIGHT === undefined) {
MAX_BLOCK_HEIGHT = await fetchLatestBlockHeight();
}

await fetchWithRetry(`${ARIO_ENDPOINT}/ar-io/admin/export-parquet`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${ADMIN_KEY}`,
},
body: JSON.stringify({
outputDir: OUTPUT_DIR,
startHeight: MIN_BLOCK_HEIGHT,
endHeight: MAX_BLOCK_HEIGHT,
maxFileRows: MAX_FILE_ROWS,
}),
});

console.log(
`Parquet export started from block ${MIN_BLOCK_HEIGHT} to ${MAX_BLOCK_HEIGHT}`,
);

let isComplete = false;

while (!isComplete) {
const response = await fetchWithRetry(
`${ARIO_ENDPOINT}/ar-io/admin/export-parquet/status`,
{
method: 'GET',
headers: {
Authorization: `Bearer ${ADMIN_KEY}`,
},
},
);

const data = await response.json();
isComplete = data.status === 'completed';

if (isComplete) {
console.log('Parque export finished!');
console.log(data);
} else {
await new Promise((resolve) => setTimeout(resolve, 5000));
}
}
})();
Loading
Loading