|
| 1 | +/** |
| 2 | + * Secure URL parsing utility for comments |
| 3 | + * Handles URL detection, validation, and safe link generation with SEO protection |
| 4 | + */ |
| 5 | + |
| 6 | +// Allowed URL schemes for security |
| 7 | +const ALLOWED_SCHEMES = ['http:', 'https:', 'mailto:'] |
| 8 | +const ALLOWED_PROTOCOLS = ['http', 'https', 'mailto'] |
| 9 | + |
| 10 | +// Maximum URL length to prevent abuse |
| 11 | +const MAX_URL_LENGTH = 2048 |
| 12 | + |
| 13 | +// Regex patterns for URL detection |
| 14 | +const URL_REGEX = /(https?:\/\/[^\s<>"{}|\\^`[\]]+)/gi |
| 15 | +const EMAIL_REGEX = /([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/gi |
| 16 | + |
| 17 | +/** |
| 18 | + * Validates if a URL is safe and allowed |
| 19 | + */ |
| 20 | +function isValidUrl(url: string): boolean { |
| 21 | + try { |
| 22 | + const parsedUrl = new URL(url) |
| 23 | + |
| 24 | + // Check if scheme is allowed |
| 25 | + if (!ALLOWED_SCHEMES.includes(parsedUrl.protocol)) { |
| 26 | + return false |
| 27 | + } |
| 28 | + |
| 29 | + // Check URL length |
| 30 | + if (url.length > MAX_URL_LENGTH) { |
| 31 | + return false |
| 32 | + } |
| 33 | + |
| 34 | + // Additional security checks |
| 35 | + // Prevent javascript: and data: URLs |
| 36 | + if (url.toLowerCase().startsWith('javascript:') || |
| 37 | + url.toLowerCase().startsWith('data:') || |
| 38 | + url.toLowerCase().startsWith('vbscript:') || |
| 39 | + url.toLowerCase().startsWith('file:')) { |
| 40 | + return false |
| 41 | + } |
| 42 | + |
| 43 | + // Check for suspicious patterns |
| 44 | + const suspiciousPatterns = [ |
| 45 | + /javascript:/i, |
| 46 | + /data:/i, |
| 47 | + /vbscript:/i, |
| 48 | + /file:/i, |
| 49 | + /<script/i, |
| 50 | + /on\w+\s*=/i, // onclick, onload, etc. |
| 51 | + ] |
| 52 | + |
| 53 | + for (const pattern of suspiciousPatterns) { |
| 54 | + if (pattern.test(url)) { |
| 55 | + return false |
| 56 | + } |
| 57 | + } |
| 58 | + |
| 59 | + return true |
| 60 | + } catch { |
| 61 | + return false |
| 62 | + } |
| 63 | +} |
| 64 | + |
| 65 | +/** |
| 66 | + * Validates if an email address is safe |
| 67 | + */ |
| 68 | +function isValidEmail(email: string): boolean { |
| 69 | + // Basic email validation |
| 70 | + const emailPattern = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/ |
| 71 | + return emailPattern.test(email) && email.length <= 254 |
| 72 | +} |
| 73 | + |
| 74 | +/** |
| 75 | + * Sanitizes text to prevent XSS attacks |
| 76 | + */ |
| 77 | +function sanitizeText(text: string): string { |
| 78 | + return text |
| 79 | + .replace(/&/g, '&') |
| 80 | + .replace(/</g, '<') |
| 81 | + .replace(/>/g, '>') |
| 82 | + .replace(/"/g, '"') |
| 83 | + .replace(/'/g, ''') |
| 84 | + .replace(/\//g, '/') |
| 85 | +} |
| 86 | + |
| 87 | +/** |
| 88 | + * Creates a safe link element with SEO protection |
| 89 | + */ |
| 90 | +function createSafeLink(url: string, text: string): string { |
| 91 | + const sanitizedUrl = sanitizeText(url) |
| 92 | + const sanitizedText = sanitizeText(text) |
| 93 | + |
| 94 | + return `<a href="${sanitizedUrl}" |
| 95 | + target="_blank" |
| 96 | + rel="noopener noreferrer nofollow" |
| 97 | + class="text-blue-600 hover:text-blue-800 underline break-words" |
| 98 | + title="External link">${sanitizedText}</a>` |
| 99 | +} |
| 100 | + |
| 101 | +/** |
| 102 | + * Creates a safe mailto link |
| 103 | + */ |
| 104 | +function createSafeMailtoLink(email: string): string { |
| 105 | + const sanitizedEmail = sanitizeText(email) |
| 106 | + |
| 107 | + return `<a href="mailto:${sanitizedEmail}" |
| 108 | + class="text-blue-600 hover:text-blue-800 underline break-words" |
| 109 | + title="Send email">${sanitizedEmail}</a>` |
| 110 | +} |
| 111 | + |
| 112 | +/** |
| 113 | + * Parses text and converts URLs and emails to safe clickable links |
| 114 | + */ |
| 115 | +export function parseUrlsInText(text: string): string { |
| 116 | + if (!text || typeof text !== 'string') { |
| 117 | + return '' |
| 118 | + } |
| 119 | + |
| 120 | + let result = text |
| 121 | + |
| 122 | + // First, handle URLs |
| 123 | + result = result.replace(URL_REGEX, (match) => { |
| 124 | + if (isValidUrl(match)) { |
| 125 | + return createSafeLink(match, match) |
| 126 | + } |
| 127 | + return match // Return original if invalid |
| 128 | + }) |
| 129 | + |
| 130 | + // Then, handle email addresses |
| 131 | + result = result.replace(EMAIL_REGEX, (match) => { |
| 132 | + if (isValidEmail(match)) { |
| 133 | + return createSafeMailtoLink(match) |
| 134 | + } |
| 135 | + return match // Return original if invalid |
| 136 | + }) |
| 137 | + |
| 138 | + return result |
| 139 | +} |
| 140 | + |
| 141 | +/** |
| 142 | + * Hook for parsing URLs in text (for use in forms, etc.) |
| 143 | + */ |
| 144 | +export function useUrlParser() { |
| 145 | + return { |
| 146 | + parseUrls: parseUrlsInText |
| 147 | + } |
| 148 | +} |
0 commit comments