diff --git a/.github/workflows/build-and-deploy.yml b/.github/workflows/build-and-deploy.yml index f6f026161823..01026b784937 100644 --- a/.github/workflows/build-and-deploy.yml +++ b/.github/workflows/build-and-deploy.yml @@ -63,6 +63,7 @@ jobs: ALGOLIA_APP_ID: ${{ vars.ALGOLIA_APP_ID }} ALGOLIA_APP_SEARCH_KEY: ${{ vars.ALGOLIA_APP_SEARCH_KEY }} ALGOLIA_APP_ADMIN_KEY: ${{ steps.esc-secrets.outputs.ALGOLIA_APP_ADMIN_KEY }} + INDEXNOW_API_KEY: ${{ steps.esc-secrets.outputs.INDEXNOW_API_KEY }} NODE_OPTIONS: "--max_old_space_size=8192" - name: Archive test results diff --git a/Makefile b/Makefile index 23f8d5d3b93a..0d429187606d 100644 --- a/Makefile +++ b/Makefile @@ -99,6 +99,11 @@ ci_update_search_index: echo "Updating search index: ${DEPLOYMENT_ENVIRONMENT}..." ./scripts/ci-update-search-index.sh "${DEPLOYMENT_ENVIRONMENT}" +.PHONY: ci_submit_indexnow +ci_submit_indexnow: + echo "Submitting URLs to IndexNow..." + ./scripts/search/submit-indexnow.sh "${DEPLOYMENT_ENVIRONMENT}" + .PHONY: serve-all serve-all: ./node_modules/.bin/concurrently --kill-others -r "./scripts/serve.sh" "yarn --cwd ./theme run start" diff --git a/scripts/ci-push.sh b/scripts/ci-push.sh index fcb2cecdd9fb..9c19fa49767b 100755 --- a/scripts/ci-push.sh +++ b/scripts/ci-push.sh @@ -8,6 +8,7 @@ source ./scripts/ci-login.sh ./scripts/sync-and-test-bucket.sh update ./scripts/generate-search-index.sh +./scripts/search/submit-indexnow.sh node ./scripts/await-in-progress.js diff --git a/scripts/search/indexnow.js b/scripts/search/indexnow.js new file mode 100644 index 000000000000..11b94fe17b00 --- /dev/null +++ b/scripts/search/indexnow.js @@ -0,0 +1,211 @@ +/** + * IndexNow API implementation for Pulumi docs + * + * This script submits new or updated URLs to the IndexNow API + * to trigger immediate crawling by search engines. + */ + +const fs = require('fs'); +const path = require('path'); +const https = require('https'); +const Sitemapper = require('sitemapper'); +const sitemap = new Sitemapper(); + +// IndexNow API settings +const INDEXNOW_ENDPOINT = 'https://www.bing.com/indexnow'; +const INDEXNOW_API_KEY = process.env.INDEXNOW_API_KEY || '33134703c43349ddb227d769745f33cc'; +const INDEXNOW_KEY_LOCATION = 'indexnow.txt'; +// Cache file to store previously submitted URLs +const CACHE_FILE = path.join(__dirname, '../../public/indexnow-submitted-urls.json'); +const BATCH_SIZE = 10000; // Maximum number of URLs per batch (IndexNow allows up to 10,000) +const SITE_URL = 'https://www.pulumi.com'; + +// Get environment +const isTestMode = process.env.INDEXNOW_TEST_MODE === 'true'; + +/** + * Get URLs from sitemap + */ +async function getSitemapUrls() { + console.log('Fetching sitemap URLs...'); + try { + const result = await sitemap.fetch(`${SITE_URL}/sitemap.xml`); + return result.sites + // Filter out any excluded patterns if needed + // For example: .filter(url => !url.match(/\/api-docs\//)) + .map(url => url.trim()) + .sort(); + } catch (error) { + console.error('Error fetching sitemap:', error); + return []; + } +} + +/** + * Load previously submitted URLs from cache file + */ +function loadSubmittedUrls() { + try { + if (fs.existsSync(CACHE_FILE)) { + const data = fs.readFileSync(CACHE_FILE, 'utf8'); + return JSON.parse(data); + } + } catch (error) { + console.warn('Error loading submitted URLs cache, starting fresh:', error.message); + } + return { + lastSubmission: null, + urls: {} + }; +} + +/** + * Save submitted URLs to cache file + */ +function saveSubmittedUrls(cache) { + try { + // Ensure the directory exists + const dir = path.dirname(CACHE_FILE); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + fs.writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2)); + } catch (error) { + console.error('Error saving submitted URLs cache:', error); + } +} + +/** + * Submit URLs to IndexNow API + */ +async function submitToIndexNow(urls) { + if (urls.length === 0) { + console.log('No URLs to submit.'); + return; + } + + // Split URLs into batches (IndexNow allows up to 10,000 URLs per submission) + for (let i = 0; i < urls.length; i += BATCH_SIZE) { + const batch = urls.slice(i, Math.min(i + BATCH_SIZE, urls.length)); + + console.log(`Submitting batch of ${batch.length} URLs to IndexNow...`); + + // Prepare data for IndexNow API + const data = JSON.stringify({ + host: new URL(SITE_URL).hostname, + key: INDEXNOW_API_KEY, + keyLocation: `${SITE_URL}/${INDEXNOW_KEY_LOCATION}`, + urlList: batch + }); + + if (isTestMode) { + console.log('TEST MODE - would submit:'); + console.log(`Endpoint: ${INDEXNOW_ENDPOINT}`); + console.log(`Data: ${data}`); + continue; + } + + // Submit to IndexNow API + try { + await new Promise((resolve, reject) => { + const options = { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Content-Length': data.length + } + }; + + const req = https.request(INDEXNOW_ENDPOINT, options, (res) => { + let responseData = ''; + + res.on('data', (chunk) => { + responseData += chunk; + }); + + res.on('end', () => { + if (res.statusCode >= 200 && res.statusCode < 300) { + console.log(`Successfully submitted ${batch.length} URLs to IndexNow.`); + resolve(); + } else { + console.error(`IndexNow API error (${res.statusCode}): ${responseData}`); + reject(new Error(`IndexNow API returned status ${res.statusCode}: ${responseData}`)); + } + }); + }); + + req.on('error', (error) => { + console.error('Error submitting to IndexNow:', error); + reject(error); + }); + + req.write(data); + req.end(); + }); + } catch (error) { + console.error('Failed to submit batch to IndexNow:', error); + // Continue with the next batch even if this one failed + } + } +} + +/** + * Generate the IndexNow API key verification file + */ +function generateKeyFile() { + console.log('Generating IndexNow key verification file...'); + const keyFilePath = path.join(__dirname, '../../public', INDEXNOW_KEY_LOCATION); + + try { + fs.writeFileSync(keyFilePath, INDEXNOW_API_KEY); + console.log(`Key file generated at: ${keyFilePath}`); + } catch (error) { + console.error('Error generating key file:', error); + } +} + +/** + * Main function + */ +async function main() { + console.log('Starting IndexNow URL submission...'); + + // Generate the key verification file + generateKeyFile(); + + // Get URLs from sitemap + const sitemapUrls = await getSitemapUrls(); + console.log(`Found ${sitemapUrls.length} URLs in sitemap.`); + + // Load previously submitted URLs + const cache = loadSubmittedUrls(); + cache.lastSubmission = new Date().toISOString(); + + // Find URLs to submit (new or updated) + const urlsToSubmit = []; + + for (const url of sitemapUrls) { + // Add URL if it wasn't submitted before, or force submission + // if INDEXNOW_FORCE_SUBMIT is set to true + if (!cache.urls[url] || process.env.INDEXNOW_FORCE_SUBMIT === 'true') { + urlsToSubmit.push(url); + cache.urls[url] = { lastSubmitted: cache.lastSubmission }; + } + } + + console.log(`Found ${urlsToSubmit.length} new or updated URLs to submit.`); + + // Submit URLs to IndexNow + await submitToIndexNow(urlsToSubmit); + + // Save updated cache + saveSubmittedUrls(cache); + + console.log('IndexNow URL submission completed.'); +} + +// Run the main function +main().catch(error => { + console.error('IndexNow script failed:', error); + process.exit(1); +}); \ No newline at end of file diff --git a/scripts/search/submit-indexnow.sh b/scripts/search/submit-indexnow.sh new file mode 100755 index 000000000000..0847f6cbb6da --- /dev/null +++ b/scripts/search/submit-indexnow.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -o errexit -o pipefail + +# This script submits URLs to IndexNow API to trigger immediate crawling by search engines +# It uses the indexnow.js Node.js script to handle the submission process + +source ./scripts/common.sh + +echo "Submitting URLs to IndexNow..." +node ./scripts/search/indexnow.js "$1" \ No newline at end of file diff --git a/scripts/search/test-indexnow-api.js b/scripts/search/test-indexnow-api.js new file mode 100644 index 000000000000..10bf8f3dd477 --- /dev/null +++ b/scripts/search/test-indexnow-api.js @@ -0,0 +1,85 @@ +/** + * Test script for verifying IndexNow API functionality + * + * This script makes a real API call with a single test URL to validate + * that the IndexNow API works correctly with our implementation. + */ + +const https = require('https'); + +// IndexNow API settings +const INDEXNOW_ENDPOINT = 'https://www.bing.com/indexnow'; +const INDEXNOW_API_KEY = process.env.INDEXNOW_API_KEY || '33134703c43349ddb227d769745f33cc'; +const TEST_URL = 'https://www.pulumi.com/'; + +// Function to test IndexNow API +async function testIndexNowApi() { + console.log('Testing IndexNow API with a single URL submission...'); + console.log(`API Key: ${INDEXNOW_API_KEY}`); + console.log(`Test URL: ${TEST_URL}`); + + // Prepare data for IndexNow API + const data = JSON.stringify({ + host: new URL(TEST_URL).hostname, + key: INDEXNOW_API_KEY, + keyLocation: `${TEST_URL}indexnow.txt`, + urlList: [TEST_URL] + }); + + // Log the request + console.log('\nRequest data:'); + console.log(data); + + try { + const response = await new Promise((resolve, reject) => { + const options = { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Content-Length': data.length + } + }; + + const req = https.request(INDEXNOW_ENDPOINT, options, (res) => { + let responseData = ''; + + res.on('data', (chunk) => { + responseData += chunk; + }); + + res.on('end', () => { + resolve({ + statusCode: res.statusCode, + headers: res.headers, + body: responseData + }); + }); + }); + + req.on('error', (error) => { + reject(error); + }); + + req.write(data); + req.end(); + }); + + // Log the response + console.log('\nResponse:'); + console.log(`Status code: ${response.statusCode}`); + console.log('Headers:', JSON.stringify(response.headers, null, 2)); + console.log('Body:', response.body); + + // Check if successful + if (response.statusCode >= 200 && response.statusCode < 300) { + console.log('\n✅ SUCCESS: IndexNow API test completed successfully!'); + } else { + console.log('\n❌ ERROR: IndexNow API test failed with status code', response.statusCode); + } + } catch (error) { + console.error('\n❌ ERROR: Failed to test IndexNow API:', error); + } +} + +// Run the test +testIndexNowApi(); \ No newline at end of file diff --git a/scripts/search/test-indexnow-api.sh b/scripts/search/test-indexnow-api.sh new file mode 100755 index 000000000000..94bd8ba71caf --- /dev/null +++ b/scripts/search/test-indexnow-api.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -o errexit -o pipefail + +# This script tests the IndexNow API with a single URL submission +# to verify that the API is working correctly + +echo "Testing IndexNow API with a single URL submission..." +node ./scripts/search/test-indexnow-api.js \ No newline at end of file diff --git a/theme/src/ts/external-links.ts b/theme/src/ts/external-links.ts new file mode 100644 index 000000000000..d7f139d788bb --- /dev/null +++ b/theme/src/ts/external-links.ts @@ -0,0 +1,57 @@ +/** + * Adds target="_blank" to all external links to make them open in a new tab. + * Internal links (links to the same domain) remain unchanged. + */ + +(function() { + // Function to process all links on the page + function processExternalLinks() { + // Get the current domain (without protocol, www, or trailing slash) + const currentDomain = window.location.hostname.replace(/^www\./, ''); + + // Select all links in the document + const links = document.querySelectorAll('a[href]'); + + // Process each link + links.forEach(link => { + const href = link.getAttribute('href'); + + // Skip links without href, anchor links, or javascript: links + if (!href || href.startsWith('#') || href.startsWith('javascript:')) { + return; + } + + try { + // Try to parse the URL (this will throw for relative URLs) + const url = new URL(href, window.location.origin); + const linkDomain = url.hostname.replace(/^www\./, ''); + + // If the domain is different from the current domain, it's external + if (linkDomain !== currentDomain) { + // Add target="_blank" and rel="noopener" (for security) + link.setAttribute('target', '_blank'); + + // Add rel="noopener noreferrer" for security + const relAttr = link.getAttribute('rel') || ''; + if (!relAttr.includes('noopener')) { + const newRel = relAttr ? `${relAttr} noopener` : 'noopener'; + link.setAttribute('rel', newRel); + } + } + } catch (e) { + // If URL parsing fails, it's likely a relative URL (internal link) + return; + } + }); + } + + // Execute when DOM is fully loaded + if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', processExternalLinks); + } else { + processExternalLinks(); + } + + // Also process links after any potential dynamic content updates + window.addEventListener('load', processExternalLinks); +})(); \ No newline at end of file diff --git a/theme/src/ts/main.ts b/theme/src/ts/main.ts index 523b4b5820ad..c7d677db0b6c 100644 --- a/theme/src/ts/main.ts +++ b/theme/src/ts/main.ts @@ -24,6 +24,7 @@ import "./docs-main"; import "./redirects"; import "./algolia/autocomplete"; import "./terraform-compare"; +import "./external-links"; // Register all Stencil components. defineCustomElements();