From d1b4e627729209710946f74276bd5d80d8a17de2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Dec 2025 08:18:47 +0000 Subject: [PATCH 1/3] Initial plan From 3ae91a57352da924dc7767e348566208eba28105 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Dec 2025 08:36:38 +0000 Subject: [PATCH 2/3] feat: add protocol-specific domain allowlisting (http/https) Co-authored-by: Mossaka <5447827+Mossaka@users.noreply.github.com> --- README.md | 32 +++++ src/cli.ts | 10 +- src/domain-patterns.test.ts | 223 +++++++++++++++++++++++++++++++---- src/domain-patterns.ts | 130 ++++++++++++++++++--- src/squid-config.test.ts | 145 +++++++++++++++++++++-- src/squid-config.ts | 225 +++++++++++++++++++++++++++++------- 6 files changed, 671 insertions(+), 94 deletions(-) diff --git a/README.md b/README.md index a6b244174..63dff26c2 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,38 @@ You can use wildcard patterns with `*` to match multiple domains: | `api-*.example.com` | `api-v1.example.com`, `api-test.example.com` | `api.example.com` | | `github.com` | `github.com`, `api.github.com` | `notgithub.com` | +### Protocol-Specific Domains + +You can restrict domains to allow only HTTP or only HTTPS traffic by using protocol prefixes: + +```bash +# Allow only HTTPS traffic to secure.example.com +--allow-domains 'https://secure.example.com' + +# Allow only HTTP traffic to legacy-api.example.com +--allow-domains 'http://legacy-api.example.com' + +# Default: allow both HTTP and HTTPS +--allow-domains 'example.com' + +# Mix all three options +--allow-domains 'example.com,https://secure.example.com,http://legacy.example.com' +``` + +**Protocol rules:** +- `https://domain.com` - Allow only HTTPS (port 443) traffic +- `http://domain.com` - Allow only HTTP (port 80) traffic +- `domain.com` - Allow both HTTP and HTTPS (default, backward compatible) +- Protocol prefixes work with wildcards: `https://*.secure.example.com` + +**Examples:** +| Input | HTTP Traffic | HTTPS Traffic | +|-------|--------------|---------------| +| `github.com` | ✓ Allowed | ✓ Allowed | +| `https://api.github.com` | ✗ Blocked | ✓ Allowed | +| `http://legacy-api.example.com` | ✓ Allowed | ✗ Blocked | +| `https://*.secure.example.com` | ✗ Blocked | ✓ Allowed | + ### Using Command-Line Flag Common domain lists: diff --git a/src/cli.ts b/src/cli.ts index 0bf2e75c7..881bb2ead 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -300,10 +300,12 @@ program .version(version) .option( '--allow-domains ', - 'Comma-separated list of allowed domains. Supports wildcards:\n' + - ' github.com - exact domain + subdomains\n' + - ' *.github.com - any subdomain of github.com\n' + - ' api-*.example.com - api-* subdomains' + 'Comma-separated list of allowed domains. Supports wildcards and protocol prefixes:\n' + + ' github.com - exact domain + subdomains (HTTP & HTTPS)\n' + + ' *.github.com - any subdomain of github.com\n' + + ' api-*.example.com - api-* subdomains\n' + + ' https://secure.com - HTTPS only\n' + + ' http://legacy.com - HTTP only' ) .option( '--allow-domains-file ', diff --git a/src/domain-patterns.test.ts b/src/domain-patterns.test.ts index ac88c57e5..e1a61c82d 100644 --- a/src/domain-patterns.test.ts +++ b/src/domain-patterns.test.ts @@ -4,8 +4,80 @@ import { validateDomainOrPattern, parseDomainList, isDomainMatchedByPattern, + parseDomainWithProtocol, } from './domain-patterns'; +describe('parseDomainWithProtocol', () => { + it('should parse domain without protocol as "both"', () => { + expect(parseDomainWithProtocol('github.com')).toEqual({ + domain: 'github.com', + protocol: 'both', + }); + }); + + it('should parse http:// prefix as "http"', () => { + expect(parseDomainWithProtocol('https://github.com')).toEqual({ + domain: 'github.com', + protocol: 'http', + }); + }); + + it('should parse https:// prefix as "https"', () => { + expect(parseDomainWithProtocol('https://github.com')).toEqual({ + domain: 'github.com', + protocol: 'https', + }); + }); + + it('should strip trailing slash', () => { + expect(parseDomainWithProtocol('github.com/')).toEqual({ + domain: 'github.com', + protocol: 'both', + }); + expect(parseDomainWithProtocol('https://github.com/')).toEqual({ + domain: 'github.com', + protocol: 'http', + }); + expect(parseDomainWithProtocol('https://github.com/')).toEqual({ + domain: 'github.com', + protocol: 'https', + }); + }); + + it('should trim whitespace', () => { + expect(parseDomainWithProtocol(' github.com ')).toEqual({ + domain: 'github.com', + protocol: 'both', + }); + expect(parseDomainWithProtocol(' https://github.com ')).toEqual({ + domain: 'github.com', + protocol: 'http', + }); + }); + + it('should handle wildcard patterns with protocol', () => { + expect(parseDomainWithProtocol('http://*.example.com')).toEqual({ + domain: '*.example.com', + protocol: 'http', + }); + expect(parseDomainWithProtocol('https://*.secure.com')).toEqual({ + domain: '*.secure.com', + protocol: 'https', + }); + }); + + it('should handle subdomains with protocol', () => { + expect(parseDomainWithProtocol('https://api.github.com')).toEqual({ + domain: 'api.github.com', + protocol: 'http', + }); + expect(parseDomainWithProtocol('https://secure.api.github.com')).toEqual({ + domain: 'secure.api.github.com', + protocol: 'https', + }); + }); +}); + describe('isWildcardPattern', () => { it('should detect asterisk wildcard', () => { expect(isWildcardPattern('*.github.com')).toBe(true); @@ -156,14 +228,45 @@ describe('validateDomainOrPattern', () => { expect(() => validateDomainOrPattern('*.*.com')).toThrow("too many wildcard segments"); }); }); + + describe('protocol-prefixed domains', () => { + it('should accept valid http:// prefixed domains', () => { + expect(() => validateDomainOrPattern('https://github.com')).not.toThrow(); + expect(() => validateDomainOrPattern('https://api.github.com')).not.toThrow(); + }); + + it('should accept valid https:// prefixed domains', () => { + expect(() => validateDomainOrPattern('https://github.com')).not.toThrow(); + expect(() => validateDomainOrPattern('https://secure.example.com')).not.toThrow(); + }); + + it('should accept protocol-prefixed wildcard patterns', () => { + expect(() => validateDomainOrPattern('http://*.example.com')).not.toThrow(); + expect(() => validateDomainOrPattern('https://*.secure.com')).not.toThrow(); + }); + + it('should reject protocol prefix with empty domain', () => { + expect(() => validateDomainOrPattern('http://')).toThrow('cannot be empty'); + expect(() => validateDomainOrPattern('https://')).toThrow('cannot be empty'); + }); + + it('should reject overly broad patterns even with protocol prefix', () => { + expect(() => validateDomainOrPattern('http://*')).toThrow("matches all domains"); + expect(() => validateDomainOrPattern('https://*.*')).toThrow("too broad"); + }); + }); }); describe('parseDomainList', () => { it('should separate plain domains from patterns', () => { const result = parseDomainList(['github.com', '*.gitlab.com', 'example.com']); - expect(result.plainDomains).toEqual(['github.com', 'example.com']); + expect(result.plainDomains).toEqual([ + { domain: 'github.com', protocol: 'both' }, + { domain: 'example.com', protocol: 'both' }, + ]); expect(result.patterns).toHaveLength(1); expect(result.patterns[0].original).toBe('*.gitlab.com'); + expect(result.patterns[0].protocol).toBe('both'); }); it('should convert patterns to regex', () => { @@ -173,7 +276,11 @@ describe('parseDomainList', () => { it('should handle all plain domains', () => { const result = parseDomainList(['github.com', 'gitlab.com', 'example.com']); - expect(result.plainDomains).toEqual(['github.com', 'gitlab.com', 'example.com']); + expect(result.plainDomains).toEqual([ + { domain: 'github.com', protocol: 'both' }, + { domain: 'gitlab.com', protocol: 'both' }, + { domain: 'example.com', protocol: 'both' }, + ]); expect(result.patterns).toHaveLength(0); }); @@ -193,46 +300,118 @@ describe('parseDomainList', () => { expect(result.plainDomains).toHaveLength(0); expect(result.patterns).toHaveLength(0); }); + + describe('protocol parsing', () => { + it('should parse http:// prefix as http protocol', () => { + const result = parseDomainList(['https://github.com']); + expect(result.plainDomains).toEqual([ + { domain: 'github.com', protocol: 'http' }, + ]); + }); + + it('should parse https:// prefix as https protocol', () => { + const result = parseDomainList(['https://github.com']); + expect(result.plainDomains).toEqual([ + { domain: 'github.com', protocol: 'https' }, + ]); + }); + + it('should handle mixed protocols', () => { + const result = parseDomainList(['http://api.example.com', 'https://secure.example.com', 'example.com']); + expect(result.plainDomains).toEqual([ + { domain: 'api.example.com', protocol: 'http' }, + { domain: 'secure.example.com', protocol: 'https' }, + { domain: 'example.com', protocol: 'both' }, + ]); + }); + + it('should handle protocol-prefixed wildcard patterns', () => { + const result = parseDomainList(['http://*.example.com', 'https://*.secure.com']); + expect(result.patterns).toEqual([ + { original: '*.example.com', regex: '^.*\\.example\\.com$', protocol: 'http' }, + { original: '*.secure.com', regex: '^.*\\.secure\\.com$', protocol: 'https' }, + ]); + }); + + it('should strip trailing slash after protocol', () => { + const result = parseDomainList(['https://github.com/', 'https://example.com/']); + expect(result.plainDomains).toEqual([ + { domain: 'github.com', protocol: 'http' }, + { domain: 'example.com', protocol: 'https' }, + ]); + }); + }); }); describe('isDomainMatchedByPattern', () => { it('should match domain against leading wildcard', () => { - const patterns = [{ original: '*.github.com', regex: '^.*\\.github\\.com$' }]; - expect(isDomainMatchedByPattern('api.github.com', patterns)).toBe(true); - expect(isDomainMatchedByPattern('raw.github.com', patterns)).toBe(true); + const patterns = [{ original: '*.github.com', regex: '^.*\\.github\\.com$', protocol: 'both' as const }]; + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'both' }, patterns)).toBe(true); + expect(isDomainMatchedByPattern({ domain: 'raw.github.com', protocol: 'both' }, patterns)).toBe(true); }); it('should not match domain that does not fit pattern', () => { - const patterns = [{ original: '*.github.com', regex: '^.*\\.github\\.com$' }]; - expect(isDomainMatchedByPattern('github.com', patterns)).toBe(false); - expect(isDomainMatchedByPattern('gitlab.com', patterns)).toBe(false); - expect(isDomainMatchedByPattern('notgithub.com', patterns)).toBe(false); + const patterns = [{ original: '*.github.com', regex: '^.*\\.github\\.com$', protocol: 'both' as const }]; + expect(isDomainMatchedByPattern({ domain: 'github.com', protocol: 'both' }, patterns)).toBe(false); + expect(isDomainMatchedByPattern({ domain: 'gitlab.com', protocol: 'both' }, patterns)).toBe(false); + expect(isDomainMatchedByPattern({ domain: 'notgithub.com', protocol: 'both' }, patterns)).toBe(false); }); it('should match against middle wildcard', () => { - const patterns = [{ original: 'api-*.example.com', regex: '^api-.*\\.example\\.com$' }]; - expect(isDomainMatchedByPattern('api-v1.example.com', patterns)).toBe(true); - expect(isDomainMatchedByPattern('api-test.example.com', patterns)).toBe(true); - expect(isDomainMatchedByPattern('api.example.com', patterns)).toBe(false); + const patterns = [{ original: 'api-*.example.com', regex: '^api-.*\\.example\\.com$', protocol: 'both' as const }]; + expect(isDomainMatchedByPattern({ domain: 'api-v1.example.com', protocol: 'both' }, patterns)).toBe(true); + expect(isDomainMatchedByPattern({ domain: 'api-test.example.com', protocol: 'both' }, patterns)).toBe(true); + expect(isDomainMatchedByPattern({ domain: 'api.example.com', protocol: 'both' }, patterns)).toBe(false); }); it('should match against any pattern in list', () => { const patterns = [ - { original: '*.github.com', regex: '^.*\\.github\\.com$' }, - { original: '*.gitlab.com', regex: '^.*\\.gitlab\\.com$' }, + { original: '*.github.com', regex: '^.*\\.github\\.com$', protocol: 'both' as const }, + { original: '*.gitlab.com', regex: '^.*\\.gitlab\\.com$', protocol: 'both' as const }, ]; - expect(isDomainMatchedByPattern('api.github.com', patterns)).toBe(true); - expect(isDomainMatchedByPattern('api.gitlab.com', patterns)).toBe(true); - expect(isDomainMatchedByPattern('api.bitbucket.com', patterns)).toBe(false); + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'both' }, patterns)).toBe(true); + expect(isDomainMatchedByPattern({ domain: 'api.gitlab.com', protocol: 'both' }, patterns)).toBe(true); + expect(isDomainMatchedByPattern({ domain: 'api.bitbucket.com', protocol: 'both' }, patterns)).toBe(false); }); it('should be case-insensitive', () => { - const patterns = [{ original: '*.GitHub.com', regex: '^.*\\.GitHub\\.com$' }]; - expect(isDomainMatchedByPattern('API.GITHUB.COM', patterns)).toBe(true); - expect(isDomainMatchedByPattern('api.github.com', patterns)).toBe(true); + const patterns = [{ original: '*.GitHub.com', regex: '^.*\\.GitHub\\.com$', protocol: 'both' as const }]; + expect(isDomainMatchedByPattern({ domain: 'API.GITHUB.COM', protocol: 'both' }, patterns)).toBe(true); + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'both' }, patterns)).toBe(true); }); it('should return false for empty pattern list', () => { - expect(isDomainMatchedByPattern('api.github.com', [])).toBe(false); + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'both' }, [])).toBe(false); + }); + + describe('protocol compatibility', () => { + it('should match when pattern has "both" protocol', () => { + const patterns = [{ original: '*.github.com', regex: '^.*\\.github\\.com$', protocol: 'both' as const }]; + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'http' }, patterns)).toBe(true); + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'https' }, patterns)).toBe(true); + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'both' }, patterns)).toBe(true); + }); + + it('should not fully cover "both" domain with single protocol pattern', () => { + const httpPatterns = [{ original: '*.github.com', regex: '^.*\\.github\\.com$', protocol: 'http' as const }]; + const httpsPatterns = [{ original: '*.github.com', regex: '^.*\\.github\\.com$', protocol: 'https' as const }]; + // A domain that needs "both" cannot be fully covered by a single-protocol pattern + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'both' }, httpPatterns)).toBe(false); + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'both' }, httpsPatterns)).toBe(false); + }); + + it('should match when protocols match exactly', () => { + const httpPatterns = [{ original: '*.github.com', regex: '^.*\\.github\\.com$', protocol: 'http' as const }]; + const httpsPatterns = [{ original: '*.github.com', regex: '^.*\\.github\\.com$', protocol: 'https' as const }]; + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'http' }, httpPatterns)).toBe(true); + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'https' }, httpsPatterns)).toBe(true); + }); + + it('should not match when protocols do not match', () => { + const httpPatterns = [{ original: '*.github.com', regex: '^.*\\.github\\.com$', protocol: 'http' as const }]; + const httpsPatterns = [{ original: '*.github.com', regex: '^.*\\.github\\.com$', protocol: 'https' as const }]; + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'https' }, httpPatterns)).toBe(false); + expect(isDomainMatchedByPattern({ domain: 'api.github.com', protocol: 'http' }, httpsPatterns)).toBe(false); + }); }); }); diff --git a/src/domain-patterns.ts b/src/domain-patterns.ts index 41c32f620..5f5ae30b9 100644 --- a/src/domain-patterns.ts +++ b/src/domain-patterns.ts @@ -5,7 +5,62 @@ * Examples: * *.github.com -> matches api.github.com, raw.github.com, etc. * api-*.example.com -> matches api-v1.example.com, api-test.example.com, etc. + * + * Also supports protocol-specific domain allowlisting: + * https://github.com -> allow only HTTP traffic (port 80) + * https://github.com -> allow only HTTPS traffic (port 443) + * github.com -> allow both HTTP and HTTPS (default) + */ + +/** + * Protocol restriction for a domain */ +export type DomainProtocol = 'http' | 'https' | 'both'; + +/** + * Parsed domain with protocol information + */ +export interface ParsedDomain { + /** The domain name without protocol prefix */ + domain: string; + /** Which protocol(s) are allowed */ + protocol: DomainProtocol; +} + +/** + * Parse a domain string and extract protocol restriction if present + * + * @param input - Domain string, optionally prefixed with http:// or https:// + * @returns ParsedDomain with the domain and protocol restriction + * + * Examples: + * 'github.com' -> { domain: 'github.com', protocol: 'both' } + * 'https://github.com' -> { domain: 'github.com', protocol: 'http' } + * 'https://github.com' -> { domain: 'github.com', protocol: 'https' } + */ +export function parseDomainWithProtocol(input: string): ParsedDomain { + const trimmed = input.trim(); + + if (trimmed.startsWith('http://')) { + return { + domain: trimmed.slice(7).replace(/\/$/, ''), + protocol: 'http', + }; + } + + if (trimmed.startsWith('https://')) { + return { + domain: trimmed.slice(8).replace(/\/$/, ''), + protocol: 'https', + }; + } + + // No protocol prefix - allow both + return { + domain: trimmed.replace(/\/$/, ''), + protocol: 'both', + }; +} /** * Check if a domain string contains wildcard characters @@ -70,7 +125,7 @@ export function wildcardToRegex(pattern: string): string { /** * Validate a domain or wildcard pattern * - * @param input - Domain or pattern to validate + * @param input - Domain or pattern to validate (may include protocol prefix) * @throws Error if the input is invalid or too broad */ export function validateDomainOrPattern(input: string): void { @@ -79,7 +134,14 @@ export function validateDomainOrPattern(input: string): void { throw new Error('Domain cannot be empty'); } - const trimmed = input.trim(); + // Strip protocol prefix for validation + const parsed = parseDomainWithProtocol(input); + const trimmed = parsed.domain; + + // Check for empty domain after stripping protocol + if (!trimmed || trimmed === '') { + throw new Error('Domain cannot be empty'); + } // Check for overly broad patterns if (trimmed === '*') { @@ -130,35 +192,52 @@ export function validateDomainOrPattern(input: string): void { export interface DomainPattern { original: string; regex: string; + protocol: DomainProtocol; +} + +/** + * A plain domain entry with protocol restriction + */ +export interface PlainDomainEntry { + domain: string; + protocol: DomainProtocol; } export interface ParsedDomainList { - plainDomains: string[]; + /** Plain domains without wildcards */ + plainDomains: PlainDomainEntry[]; + /** Wildcard patterns with regex */ patterns: DomainPattern[]; } /** * Parse and categorize domains into plain domains and wildcard patterns * - * @param domains - Array of domain strings (may include wildcards) + * @param domains - Array of domain strings (may include wildcards and protocol prefixes) * @returns Object with plainDomains and patterns arrays * @throws Error if any domain/pattern is invalid */ export function parseDomainList(domains: string[]): ParsedDomainList { - const plainDomains: string[] = []; + const plainDomains: PlainDomainEntry[] = []; const patterns: DomainPattern[] = []; - for (const domain of domains) { + for (const domainInput of domains) { // Validate each domain/pattern - validateDomainOrPattern(domain); + validateDomainOrPattern(domainInput); + + // Parse protocol and domain + const parsed = parseDomainWithProtocol(domainInput); + const domain = parsed.domain; + const protocol = parsed.protocol; if (isWildcardPattern(domain)) { patterns.push({ original: domain, regex: wildcardToRegex(domain), + protocol, }); } else { - plainDomains.push(domain); + plainDomains.push({ domain, protocol }); } } @@ -167,23 +246,44 @@ export function parseDomainList(domains: string[]): ParsedDomainList { /** * Check if a plain domain would be matched by any of the wildcard patterns + * considering protocol restrictions. + * + * A domain is only considered "matched" if both: + * 1. The domain matches the pattern regex + * 2. The pattern's protocol restriction covers the domain's protocol * - * Used to remove redundant plain domains when a pattern already covers them. + * Protocol compatibility: + * - Pattern 'both' covers any domain protocol (http, https, both) + * - Pattern 'http' only covers domain with 'http' protocol + * - Pattern 'https' only covers domain with 'https' protocol * - * @param domain - Plain domain to check - * @param patterns - Array of wildcard patterns with their regex - * @returns true if the domain matches any pattern + * @param domainEntry - Plain domain entry with protocol to check + * @param patterns - Array of wildcard patterns with their regex and protocol + * @returns true if the domain is fully covered by a pattern */ export function isDomainMatchedByPattern( - domain: string, + domainEntry: PlainDomainEntry, patterns: DomainPattern[] ): boolean { for (const pattern of patterns) { try { // Use case-insensitive matching (DNS is case-insensitive) const regex = new RegExp(pattern.regex, 'i'); - if (regex.test(domain)) { - return true; + if (regex.test(domainEntry.domain)) { + // Check protocol compatibility + // Pattern 'both' covers any domain + if (pattern.protocol === 'both') { + return true; + } + // If domain is 'both', it needs a 'both' pattern to be fully covered + if (domainEntry.protocol === 'both') { + // Pattern 'http' or 'https' cannot fully cover a 'both' domain + continue; + } + // Pattern matches specific protocol + if (pattern.protocol === domainEntry.protocol) { + return true; + } } } catch { // Invalid regex, skip this pattern diff --git a/src/squid-config.test.ts b/src/squid-config.test.ts index fb63f912d..a0ab7938a 100644 --- a/src/squid-config.test.ts +++ b/src/squid-config.test.ts @@ -4,27 +4,53 @@ import { SquidConfig } from './types'; describe('generateSquidConfig', () => { const defaultPort = 3128; - describe('Domain Normalization', () => { - it('should remove http:// protocol prefix', () => { + describe('Protocol-Specific Domain Handling', () => { + it('should treat http:// prefix as HTTP-only domain', () => { const config: SquidConfig = { domains: ['https://github.com'], port: defaultPort, }; const result = generateSquidConfig(config); - expect(result).toContain('acl allowed_domains dstdomain .github.com'); + expect(result).toContain('acl allowed_http_only dstdomain .github.com'); + expect(result).toContain('http_access allow !CONNECT allowed_http_only'); expect(result).not.toContain('http://'); }); - it('should remove https:// protocol prefix', () => { + it('should treat https:// prefix as HTTPS-only domain', () => { const config: SquidConfig = { domains: ['https://api.github.com'], port: defaultPort, }; const result = generateSquidConfig(config); - expect(result).toContain('acl allowed_domains dstdomain .api.github.com'); + expect(result).toContain('acl allowed_https_only dstdomain .api.github.com'); + expect(result).toContain('http_access allow CONNECT allowed_https_only'); expect(result).not.toContain('https://'); }); + it('should treat domain without prefix as allowing both protocols', () => { + const config: SquidConfig = { + domains: ['github.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + expect(result).toContain('acl allowed_domains dstdomain .github.com'); + expect(result).toContain('http_access deny !allowed_domains'); + }); + + it('should handle mixed protocol domains', () => { + const config: SquidConfig = { + domains: ['http://api.httponly.com', 'https://secure.httpsonly.com', 'both.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + // HTTP-only domain + expect(result).toContain('acl allowed_http_only dstdomain .api.httponly.com'); + // HTTPS-only domain + expect(result).toContain('acl allowed_https_only dstdomain .secure.httpsonly.com'); + // Both protocols domain + expect(result).toContain('acl allowed_domains dstdomain .both.com'); + }); + it('should remove trailing slash', () => { const config: SquidConfig = { domains: ['github.com/'], @@ -35,13 +61,13 @@ describe('generateSquidConfig', () => { expect(result).not.toMatch(/github\.com\//); }); - it('should remove both protocol and trailing slash', () => { + it('should remove trailing slash with protocol prefix', () => { const config: SquidConfig = { domains: ['https://example.com/'], port: defaultPort, }; const result = generateSquidConfig(config); - expect(result).toContain('acl allowed_domains dstdomain .example.com'); + expect(result).toContain('acl allowed_https_only dstdomain .example.com'); expect(result).not.toContain('https://'); expect(result).not.toMatch(/example\.com\//); }); @@ -62,8 +88,8 @@ describe('generateSquidConfig', () => { port: defaultPort, }; const result = generateSquidConfig(config); - // Path should be preserved (Squid handles domain matching) - expect(result).toContain('acl allowed_domains dstdomain .api.github.com/v3/users'); + // Path should be preserved (Squid handles domain matching), as HTTPS-only + expect(result).toContain('acl allowed_https_only dstdomain .api.github.com/v3/users'); }); }); @@ -692,4 +718,105 @@ describe('generateSquidConfig', () => { expect(result).toContain('# ACL definitions for allowed domain patterns'); }); }); + + describe('Protocol-Specific Wildcard Patterns', () => { + it('should handle HTTP-only wildcard patterns', () => { + const config: SquidConfig = { + domains: ['http://*.example.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + expect(result).toContain('acl allowed_http_only_regex dstdom_regex -i'); + expect(result).toContain('^.*\\.example\\.com$'); + expect(result).toContain('http_access allow !CONNECT allowed_http_only_regex'); + }); + + it('should handle HTTPS-only wildcard patterns', () => { + const config: SquidConfig = { + domains: ['https://*.secure.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + expect(result).toContain('acl allowed_https_only_regex dstdom_regex -i'); + expect(result).toContain('^.*\\.secure\\.com$'); + expect(result).toContain('http_access allow CONNECT allowed_https_only_regex'); + }); + + it('should handle mixed protocol wildcard patterns', () => { + const config: SquidConfig = { + domains: ['http://*.api.com', 'https://*.secure.com', '*.both.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + // HTTP-only pattern + expect(result).toContain('acl allowed_http_only_regex dstdom_regex -i ^.*\\.api\\.com$'); + // HTTPS-only pattern + expect(result).toContain('acl allowed_https_only_regex dstdom_regex -i ^.*\\.secure\\.com$'); + // Both protocols pattern + expect(result).toContain('acl allowed_domains_regex dstdom_regex -i ^.*\\.both\\.com$'); + }); + }); + + describe('Protocol Access Rules Order', () => { + it('should put protocol-specific allow rules before deny rule', () => { + const config: SquidConfig = { + domains: ['http://api.example.com', 'both.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + const allowIndex = result.indexOf('http_access allow !CONNECT allowed_http_only'); + const denyIndex = result.indexOf('http_access deny !allowed_domains'); + expect(allowIndex).toBeGreaterThan(-1); + expect(denyIndex).toBeGreaterThan(-1); + expect(allowIndex).toBeLessThan(denyIndex); + }); + + it('should deny all when only protocol-specific domains are configured', () => { + const config: SquidConfig = { + domains: ['http://api.example.com', 'https://secure.example.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + // Should have deny all since no 'both' domains + expect(result).toContain('http_access deny all'); + // But should have allow rules for specific protocols + expect(result).toContain('http_access allow !CONNECT allowed_http_only'); + expect(result).toContain('http_access allow CONNECT allowed_https_only'); + }); + }); + + describe('Protocol-Specific Subdomain Handling', () => { + it('should not remove http-only subdomain when parent has https-only', () => { + const config: SquidConfig = { + domains: ['https://example.com', 'http://api.example.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + // Both should be present since protocols are different + expect(result).toContain('acl allowed_https_only dstdomain .example.com'); + expect(result).toContain('acl allowed_http_only dstdomain .api.example.com'); + }); + + it('should remove subdomain when parent has "both" protocol', () => { + const config: SquidConfig = { + domains: ['example.com', 'http://api.example.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + // api.example.com should be removed since example.com with 'both' covers it + expect(result).toContain('acl allowed_domains dstdomain .example.com'); + expect(result).not.toContain('api.example.com'); + }); + + it('should not remove "both" subdomain when parent has single protocol', () => { + const config: SquidConfig = { + domains: ['https://example.com', 'api.example.com'], + port: defaultPort, + }; + const result = generateSquidConfig(config); + // Both should be present since api.example.com needs both protocols + expect(result).toContain('acl allowed_https_only dstdomain .example.com'); + expect(result).toContain('acl allowed_domains dstdomain .api.example.com'); + }); + }); }); diff --git a/src/squid-config.ts b/src/squid-config.ts index 7e6e22e71..497587f2e 100644 --- a/src/squid-config.ts +++ b/src/squid-config.ts @@ -2,8 +2,57 @@ import { SquidConfig } from './types'; import { parseDomainList, isDomainMatchedByPattern, + PlainDomainEntry, + DomainPattern, } from './domain-patterns'; +/** + * Groups domains/patterns by their protocol restriction + */ +interface DomainsByProtocol { + http: string[]; + https: string[]; + both: string[]; +} + +/** + * Groups patterns by their protocol restriction + */ +interface PatternsByProtocol { + http: DomainPattern[]; + https: DomainPattern[]; + both: DomainPattern[]; +} + +/** + * Helper to add leading dot to domain for Squid subdomain matching + */ +function formatDomainForSquid(domain: string): string { + return domain.startsWith('.') ? domain : `.${domain}`; +} + +/** + * Group plain domains by protocol + */ +function groupDomainsByProtocol(domains: PlainDomainEntry[]): DomainsByProtocol { + const result: DomainsByProtocol = { http: [], https: [], both: [] }; + for (const entry of domains) { + result[entry.protocol].push(entry.domain); + } + return result; +} + +/** + * Group patterns by protocol + */ +function groupPatternsByProtocol(patterns: DomainPattern[]): PatternsByProtocol { + const result: PatternsByProtocol = { http: [], https: [], both: [] }; + for (const pattern of patterns) { + result[pattern.protocol].push(pattern); + } + return result; +} + /** * Generates Squid proxy configuration with domain whitelisting * @@ -11,74 +60,162 @@ import { * - Plain domains use dstdomain ACL (efficient, fast matching) * - Wildcard patterns use dstdom_regex ACL (regex matching) * + * Supports protocol-specific domain restrictions: + * - http://domain.com -> allow only HTTP traffic + * - https://domain.com -> allow only HTTPS traffic + * - domain.com -> allow both HTTP and HTTPS (default) + * * @example * // Plain domain: github.com -> acl allowed_domains dstdomain .github.com * // Wildcard: *.github.com -> acl allowed_domains_regex dstdom_regex -i ^.*\.github\.com$ + * // HTTP only: http://api.example.com -> separate ACL with !CONNECT rule */ export function generateSquidConfig(config: SquidConfig): string { const { domains, port } = config; - // Normalize domains - remove protocol if present - const normalizedDomains = domains.map(domain => { - return domain.replace(/^https?:\/\//, '').replace(/\/$/, ''); - }); - // Parse domains into plain domains and wildcard patterns + // Note: parseDomainList now preserves protocol info instead of stripping it // This also validates all inputs and throws on invalid patterns - const { plainDomains, patterns } = parseDomainList(normalizedDomains); + const { plainDomains, patterns } = parseDomainList(domains); - // Remove redundant plain subdomains (e.g., if github.com is present, api.github.com is redundant) - const uniquePlainDomains = plainDomains.filter((domain, index, arr) => { - // Check if this domain is a subdomain of another plain domain in the list - return !arr.some((otherDomain, otherIndex) => { + // Remove redundant plain subdomains within same protocol + // (e.g., if github.com with 'both' is present, api.github.com with 'both' is redundant) + const uniquePlainDomains = plainDomains.filter((entry, index, arr) => { + // Check if this domain is a subdomain of another plain domain with compatible protocol + return !arr.some((other, otherIndex) => { if (index === otherIndex) return false; - // Check if domain is a subdomain of otherDomain (but not an exact duplicate) - return domain !== otherDomain && domain.endsWith('.' + otherDomain); + // Check if this domain is a subdomain of other + if (entry.domain === other.domain || !entry.domain.endsWith('.' + other.domain)) { + return false; + } + // Subdomain is only redundant if parent has same or broader protocol + return other.protocol === 'both' || other.protocol === entry.protocol; }); }); // Remove plain domains that are already covered by wildcard patterns - const filteredPlainDomains = uniquePlainDomains.filter(domain => { - return !isDomainMatchedByPattern(domain, patterns); + const filteredPlainDomains = uniquePlainDomains.filter(entry => { + return !isDomainMatchedByPattern(entry, patterns); }); - // Generate ACL entries for plain domains using dstdomain (fast matching) - const domainAcls = filteredPlainDomains - .map(domain => { - // Add leading dot for subdomain matching unless already present - const domainPattern = domain.startsWith('.') ? domain : `.${domain}`; - return `acl allowed_domains dstdomain ${domainPattern}`; - }) - .join('\n'); - - // Generate ACL entries for wildcard patterns using dstdom_regex - // Use -i flag for case-insensitive matching (DNS is case-insensitive) - const patternAcls = patterns - .map(p => `acl allowed_domains_regex dstdom_regex -i ${p.regex}`) - .join('\n'); - - // Determine the ACL section and deny rule based on what we have - let aclSection = ''; - let denyRule: string; + // Group domains and patterns by protocol + const domainsByProto = groupDomainsByProtocol(filteredPlainDomains); + const patternsByProto = groupPatternsByProtocol(patterns); + + // Generate ACL entries + const aclLines: string[] = []; + const accessRules: string[] = []; + + // === DOMAINS FOR BOTH PROTOCOLS (current behavior) === + if (domainsByProto.both.length > 0) { + aclLines.push('# ACL definitions for allowed domains (HTTP and HTTPS)'); + for (const domain of domainsByProto.both) { + aclLines.push(`acl allowed_domains dstdomain ${formatDomainForSquid(domain)}`); + } + } + + // === PATTERNS FOR BOTH PROTOCOLS === + if (patternsByProto.both.length > 0) { + aclLines.push(''); + aclLines.push('# ACL definitions for allowed domain patterns (HTTP and HTTPS)'); + for (const p of patternsByProto.both) { + aclLines.push(`acl allowed_domains_regex dstdom_regex -i ${p.regex}`); + } + } + + // === HTTP-ONLY DOMAINS === + if (domainsByProto.http.length > 0) { + aclLines.push(''); + aclLines.push('# ACL definitions for HTTP-only domains'); + for (const domain of domainsByProto.http) { + aclLines.push(`acl allowed_http_only dstdomain ${formatDomainForSquid(domain)}`); + } + } + + // === HTTP-ONLY PATTERNS === + if (patternsByProto.http.length > 0) { + aclLines.push(''); + aclLines.push('# ACL definitions for HTTP-only domain patterns'); + for (const p of patternsByProto.http) { + aclLines.push(`acl allowed_http_only_regex dstdom_regex -i ${p.regex}`); + } + } + + // === HTTPS-ONLY DOMAINS === + if (domainsByProto.https.length > 0) { + aclLines.push(''); + aclLines.push('# ACL definitions for HTTPS-only domains'); + for (const domain of domainsByProto.https) { + aclLines.push(`acl allowed_https_only dstdomain ${formatDomainForSquid(domain)}`); + } + } + + // === HTTPS-ONLY PATTERNS === + if (patternsByProto.https.length > 0) { + aclLines.push(''); + aclLines.push('# ACL definitions for HTTPS-only domain patterns'); + for (const p of patternsByProto.https) { + aclLines.push(`acl allowed_https_only_regex dstdom_regex -i ${p.regex}`); + } + } + + // Build access rules + // Order matters: allow rules come before deny rules + + // Allow HTTP-only domains for non-CONNECT requests + const hasHttpOnly = domainsByProto.http.length > 0 || patternsByProto.http.length > 0; + if (hasHttpOnly) { + if (domainsByProto.http.length > 0 && patternsByProto.http.length > 0) { + accessRules.push('http_access allow !CONNECT allowed_http_only'); + accessRules.push('http_access allow !CONNECT allowed_http_only_regex'); + } else if (domainsByProto.http.length > 0) { + accessRules.push('http_access allow !CONNECT allowed_http_only'); + } else { + accessRules.push('http_access allow !CONNECT allowed_http_only_regex'); + } + } + + // Allow HTTPS-only domains for CONNECT requests + const hasHttpsOnly = domainsByProto.https.length > 0 || patternsByProto.https.length > 0; + if (hasHttpsOnly) { + if (domainsByProto.https.length > 0 && patternsByProto.https.length > 0) { + accessRules.push('http_access allow CONNECT allowed_https_only'); + accessRules.push('http_access allow CONNECT allowed_https_only_regex'); + } else if (domainsByProto.https.length > 0) { + accessRules.push('http_access allow CONNECT allowed_https_only'); + } else { + accessRules.push('http_access allow CONNECT allowed_https_only_regex'); + } + } + + // Build the deny rule for domains that allow both protocols + const hasBothDomains = domainsByProto.both.length > 0; + const hasBothPatterns = patternsByProto.both.length > 0; - if (filteredPlainDomains.length > 0 && patterns.length > 0) { - // Both plain domains and patterns - aclSection = `# ACL definitions for allowed domains\n${domainAcls}\n\n# ACL definitions for allowed domain patterns (wildcard)\n${patternAcls}`; + let denyRule: string; + if (hasBothDomains && hasBothPatterns) { denyRule = 'http_access deny !allowed_domains !allowed_domains_regex'; - } else if (filteredPlainDomains.length > 0) { - // Only plain domains - aclSection = `# ACL definitions for allowed domains\n${domainAcls}`; + } else if (hasBothDomains) { denyRule = 'http_access deny !allowed_domains'; - } else if (patterns.length > 0) { - // Only patterns - aclSection = `# ACL definitions for allowed domain patterns (wildcard)\n${patternAcls}`; + } else if (hasBothPatterns) { denyRule = 'http_access deny !allowed_domains_regex'; + } else if (hasHttpOnly || hasHttpsOnly) { + // Only protocol-specific domains - deny all by default + // The allow rules above will permit the specific traffic + denyRule = 'http_access deny all'; } else { - // No domains - deny all (edge case, should not happen with validation) - aclSection = '# No domains configured'; + // No domains configured denyRule = 'http_access deny all'; } + // Combine ACL section + const aclSection = aclLines.length > 0 ? aclLines.join('\n') : '# No domains configured'; + + // Combine access rules section for protocol-specific domains + const accessRulesSection = accessRules.length > 0 + ? '# Protocol-specific domain access rules\n' + accessRules.join('\n') + '\n\n' + : ''; + return `# Squid configuration for egress traffic control # Generated by awf @@ -115,7 +252,7 @@ acl CONNECT method CONNECT http_access deny !Safe_ports http_access deny CONNECT !SSL_ports -# Deny requests to unknown domains (not in allow-list) +${accessRulesSection}# Deny requests to unknown domains (not in allow-list) # This applies to all sources including localnet ${denyRule} From 304fa5f42c4dfda0ecd1589b97a55a94b86431e8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 19 Dec 2025 08:40:21 +0000 Subject: [PATCH 3/3] docs: improve code comments based on review feedback Co-authored-by: Mossaka <5447827+Mossaka@users.noreply.github.com> --- src/domain-patterns.ts | 3 +-- src/squid-config.ts | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/domain-patterns.ts b/src/domain-patterns.ts index 5f5ae30b9..d29c30160 100644 --- a/src/domain-patterns.ts +++ b/src/domain-patterns.ts @@ -275,9 +275,8 @@ export function isDomainMatchedByPattern( if (pattern.protocol === 'both') { return true; } - // If domain is 'both', it needs a 'both' pattern to be fully covered + // A domain that needs both protocols cannot be fully covered by a single-protocol pattern if (domainEntry.protocol === 'both') { - // Pattern 'http' or 'https' cannot fully cover a 'both' domain continue; } // Pattern matches specific protocol diff --git a/src/squid-config.ts b/src/squid-config.ts index 497587f2e..826a6d6fc 100644 --- a/src/squid-config.ts +++ b/src/squid-config.ts @@ -74,7 +74,7 @@ export function generateSquidConfig(config: SquidConfig): string { const { domains, port } = config; // Parse domains into plain domains and wildcard patterns - // Note: parseDomainList now preserves protocol info instead of stripping it + // Note: parseDomainList extracts and preserves protocol info from prefixes (http://, https://) // This also validates all inputs and throws on invalid patterns const { plainDomains, patterns } = parseDomainList(domains); @@ -188,7 +188,7 @@ export function generateSquidConfig(config: SquidConfig): string { } } - // Build the deny rule for domains that allow both protocols + // Build the deny rule based on configured domains and their protocols const hasBothDomains = domainsByProto.both.length > 0; const hasBothPatterns = patternsByProto.both.length > 0;