Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ module.exports = {
allowedInDev: false,
},
],
resolve(__dirname, 'src/plugins/docusaurus-plugin-preview-meta'),
() => ({
name: 'webpack-loader-fix',
configureWebpack() {
Expand Down
6 changes: 6 additions & 0 deletions nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,12 @@ server {
resolver 172.20.0.10;
server_name ~^(?<subdomain>[^.]+)\.preview\.docs\.apify\.com$;

# Block search indexing on every response from preview hostnames, including the
# apify.github.io proxy_passes (SDK/Client/CLI) that have no preview bucket and
# would otherwise serve production content under the preview hostname.
# `always` keeps the header on 4xx/5xx so error pages aren't indexed either.
add_header X-Robots-Tag "noindex, nofollow, noarchive" always;

# add trailing slashes to the root of GH pages docs
rewrite ^/api/client/js$ /api/client/js/ redirect;
rewrite ^/api/client/python$ /api/client/python/ redirect;
Expand Down
73 changes: 73 additions & 0 deletions src/plugins/docusaurus-plugin-preview-meta/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
const fs = require('node:fs/promises');
const path = require('node:path');

const CANONICAL_ORIGIN = 'https://docs.apify.com';

function isPreviewBuild() {
const url = process.env.APIFY_DOCS_ABSOLUTE_URL;
if (!url) return false;
try {
const { hostname } = new URL(url);
return hostname.includes('pr-') || hostname.includes('preview');
} catch {
return false;
}
}

async function walkHtmlFiles(dir) {
const entries = await fs.readdir(dir, { withFileTypes: true });
const results = await Promise.all(
entries.map(async (entry) => {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) return walkHtmlFiles(fullPath);
if (entry.isFile() && entry.name.endsWith('.html')) return [fullPath];
return [];
}),
);
return results.flat();
}

function canonicalUrlForFile(outDir, file) {
const relative = path.relative(outDir, file).replace(/\\/g, '/');
// Docusaurus serves 404.html at the literal /404.html path; preserve it for parity with production.
if (relative === '404.html') return `${CANONICAL_ORIGIN}/404.html`;
const urlPath = relative.replace(/\/?index\.html$/, '').replace(/\.html$/, '');
return urlPath ? `${CANONICAL_ORIGIN}/${urlPath}` : CANONICAL_ORIGIN;
}

// SWC's HTML minifier strips quotes from attributes and omits </head>, so the regex must be
// quote-optional and the fallback must target <body instead of </head>.
const CANONICAL_TAG_REGEX = /<link\b[^>]*?\brel=["']?canonical["']?[^>]*>/i;

module.exports = function previewMetaPlugin() {
return {
name: 'docusaurus-plugin-preview-meta',
async postBuild({ outDir }) {
if (!isPreviewBuild()) return;

await fs.writeFile(path.join(outDir, 'robots.txt'), 'User-agent: *\nDisallow: /\n');

const htmlFiles = await walkHtmlFiles(outDir);
await Promise.all(
htmlFiles.map(async (file) => {
const content = await fs.readFile(file, 'utf8');
const canonicalUrl = canonicalUrlForFile(outDir, file);
const canonicalTag = `<link rel="canonical" href="${canonicalUrl}"/>`;

let next;
if (CANONICAL_TAG_REGEX.test(content)) {
next = content.replace(CANONICAL_TAG_REGEX, canonicalTag);
} else if (content.includes('</head>')) {
next = content.replace('</head>', `${canonicalTag}</head>`);
} else if (content.includes('<body')) {
next = content.replace('<body', `${canonicalTag}<body`);
} else {
return;
}

if (next !== content) await fs.writeFile(file, next);
}),
);
},
};
};
Loading