diff --git a/containers/agent/Dockerfile b/containers/agent/Dockerfile index 7762c614c..695a461a7 100644 --- a/containers/agent/Dockerfile +++ b/containers/agent/Dockerfile @@ -35,11 +35,12 @@ RUN groupadd -g ${USER_GID} awfuser && \ mkdir -p /home/awfuser/.copilot/logs && \ chown -R awfuser:awfuser /home/awfuser -# Copy iptables setup script and docker wrapper +# Copy iptables setup script, docker wrapper, and PID logger COPY setup-iptables.sh /usr/local/bin/setup-iptables.sh COPY entrypoint.sh /usr/local/bin/entrypoint.sh COPY docker-wrapper.sh /usr/local/bin/docker-wrapper.sh -RUN chmod +x /usr/local/bin/setup-iptables.sh /usr/local/bin/entrypoint.sh /usr/local/bin/docker-wrapper.sh +COPY pid-logger.sh /usr/local/bin/pid-logger.sh +RUN chmod +x /usr/local/bin/setup-iptables.sh /usr/local/bin/entrypoint.sh /usr/local/bin/docker-wrapper.sh /usr/local/bin/pid-logger.sh # Install docker wrapper to intercept docker commands # Rename real docker binary and replace with wrapper diff --git a/containers/agent/pid-logger.sh b/containers/agent/pid-logger.sh new file mode 100644 index 000000000..a3da578b5 --- /dev/null +++ b/containers/agent/pid-logger.sh @@ -0,0 +1,215 @@ +#!/bin/bash +# +# PID Logger - Shell-based process tracking for network connections +# +# This script tracks which process is using a specific source port by +# reading /proc/net/tcp and scanning /proc/[pid]/fd/ directories. +# +# Usage: +# ./pid-logger.sh +# ./pid-logger.sh 45678 +# +# Output (JSON): +# {"srcPort":45678,"pid":12345,"cmdline":"curl https://github.com","comm":"curl","inode":"123456"} +# +# Exit codes: +# 0 - Success, process found +# 1 - Error (invalid arguments, port not found, etc.) +# +# Note: This script requires read access to /proc filesystem and may need +# appropriate permissions to read other processes' fd directories. +# + +set -e + +# Function to convert hex port to decimal +hex_to_dec() { + printf "%d" "0x$1" +} + +# Function to convert little-endian hex IP to dotted decimal +hex_to_ip() { + local hex="$1" + # /proc/net/tcp stores IPs in little-endian format + local b1=$((16#${hex:6:2})) + local b2=$((16#${hex:4:2})) + local b3=$((16#${hex:2:2})) + local b4=$((16#${hex:0:2})) + echo "$b1.$b2.$b3.$b4" +} + +# Function to find inode for a given port +find_inode_for_port() { + local target_port="$1" + + # Skip header line and parse each connection + # Use awk to avoid subshell issues with while loops + awk -v target="$target_port" ' + NR > 1 { + # Parse local address (field 2, format: ADDR:PORT) + split($2, addr_parts, ":") + port_hex = addr_parts[2] + # Convert hex port to decimal + port_dec = 0 + for (i = 1; i <= length(port_hex); i++) { + c = substr(port_hex, i, 1) + if (c ~ /[0-9]/) { + port_dec = port_dec * 16 + (c - 0) + } else if (c ~ /[a-f]/) { + port_dec = port_dec * 16 + (10 + index("abcdef", c) - 1) + } else if (c ~ /[A-F]/) { + port_dec = port_dec * 16 + (10 + index("ABCDEF", c) - 1) + } + } + if (port_dec == target) { + # Print inode (field 10) + print $10 + exit 0 + } + } + ' /proc/net/tcp 2>/dev/null +} + +# Function to find process owning a socket inode +find_process_for_inode() { + local target_inode="$1" + + # Scan all numeric directories in /proc (these are PIDs) + for pid_dir in /proc/[0-9]*; do + local pid + pid=$(basename "$pid_dir") + + # Check if fd directory is readable + if [ -d "$pid_dir/fd" ] && [ -r "$pid_dir/fd" ]; then + # Check each file descriptor + for fd in "$pid_dir/fd"/*; do + if [ -L "$fd" ]; then + local link_target + link_target=$(readlink "$fd" 2>/dev/null || true) + if [ "$link_target" = "socket:[$target_inode]" ]; then + echo "$pid" + return 0 + fi + fi + done + fi + done 2>/dev/null + + return 1 +} + +# Function to get process command line +get_cmdline() { + local pid="$1" + if [ -r "/proc/$pid/cmdline" ]; then + # cmdline is null-separated, convert to spaces + tr '\0' ' ' < "/proc/$pid/cmdline" | sed 's/ $//' + else + echo "unknown" + fi +} + +# Function to get process short name +get_comm() { + local pid="$1" + if [ -r "/proc/$pid/comm" ]; then + cat "/proc/$pid/comm" | tr -d '\n' + else + echo "unknown" + fi +} + +# Function to escape JSON string +json_escape() { + local str="$1" + # Escape backslashes first, then quotes + str="${str//\\/\\\\}" + str="${str//\"/\\\"}" + # Escape control characters + str="${str//$'\n'/\\n}" + str="${str//$'\r'/\\r}" + str="${str//$'\t'/\\t}" + echo "$str" +} + +# Function to output JSON result +output_json() { + local src_port="$1" + local pid="$2" + local cmdline="$3" + local comm="$4" + local inode="$5" + local error="$6" + + cmdline=$(json_escape "$cmdline") + comm=$(json_escape "$comm") + + if [ -n "$error" ]; then + error=$(json_escape "$error") + echo "{\"srcPort\":$src_port,\"pid\":$pid,\"cmdline\":\"$cmdline\",\"comm\":\"$comm\",\"error\":\"$error\"}" + elif [ -n "$inode" ]; then + echo "{\"srcPort\":$src_port,\"pid\":$pid,\"cmdline\":\"$cmdline\",\"comm\":\"$comm\",\"inode\":\"$inode\"}" + else + echo "{\"srcPort\":$src_port,\"pid\":$pid,\"cmdline\":\"$cmdline\",\"comm\":\"$comm\"}" + fi +} + +# Main function +main() { + local src_port="$1" + + # Validate arguments + if [ -z "$src_port" ]; then + echo "Usage: $0 " >&2 + echo "Example: $0 45678" >&2 + exit 1 + fi + + # Validate port is numeric + if ! [[ "$src_port" =~ ^[0-9]+$ ]]; then + output_json "$src_port" -1 "unknown" "unknown" "" "Invalid port: must be numeric" + exit 1 + fi + + # Validate port range (1-65535) + if [ "$src_port" -lt 1 ] || [ "$src_port" -gt 65535 ]; then + output_json "$src_port" -1 "unknown" "unknown" "" "Invalid port: must be in range 1-65535" + exit 1 + fi + + # Check if /proc/net/tcp exists + if [ ! -r /proc/net/tcp ]; then + output_json "$src_port" -1 "unknown" "unknown" "" "Cannot read /proc/net/tcp" + exit 1 + fi + + # Find inode for the port + local inode + inode=$(find_inode_for_port "$src_port") + + if [ -z "$inode" ] || [ "$inode" = "0" ]; then + output_json "$src_port" -1 "unknown" "unknown" "" "No socket found for port $src_port" + exit 1 + fi + + # Find process owning the socket + local pid + pid=$(find_process_for_inode "$inode") + + if [ -z "$pid" ]; then + output_json "$src_port" -1 "unknown" "unknown" "$inode" "Socket inode $inode found but no process owns it" + exit 1 + fi + + # Get process information + local cmdline + cmdline=$(get_cmdline "$pid") + local comm + comm=$(get_comm "$pid") + + # Output result + output_json "$src_port" "$pid" "$cmdline" "$comm" "$inode" + exit 0 +} + +main "$@" diff --git a/src/cli.ts b/src/cli.ts index 0bf2e75c7..94b029246 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -610,6 +610,11 @@ program ) .option('--source ', 'Path to log directory or "running" for live container') .option('--list', 'List available log sources', false) + .option( + '--with-pid', + 'Enrich logs with PID/process info (real-time only, requires -f)', + false + ) .action(async (options) => { // Validate format option const validFormats: OutputFormat[] = ['raw', 'pretty', 'json']; @@ -618,6 +623,11 @@ program process.exit(1); } + // Warn if --with-pid is used without -f + if (options.withPid && !options.follow) { + logger.warn('--with-pid only works with real-time streaming (-f). PID tracking disabled.'); + } + // Dynamic import to avoid circular dependencies const { logsCommand } = await import('./commands/logs'); await logsCommand({ @@ -625,6 +635,7 @@ program format: options.format as OutputFormat, source: options.source, list: options.list, + withPid: options.withPid && options.follow, // Only enable if also following }); }); diff --git a/src/commands/logs.ts b/src/commands/logs.ts index 98eb0839c..4ed9c2c12 100644 --- a/src/commands/logs.ts +++ b/src/commands/logs.ts @@ -25,6 +25,8 @@ export interface LogsCommandOptions { source?: string; /** List available log sources without streaming */ list?: boolean; + /** Enrich logs with PID/process info (real-time only) */ + withPid?: boolean; } /** @@ -94,6 +96,7 @@ export async function logsCommand(options: LogsCommandOptions): Promise { source, formatter, parse, + withPid: options.withPid || false, }); } catch (error) { logger.error(`Failed to stream logs: ${error instanceof Error ? error.message : error}`); diff --git a/src/logs/log-formatter.test.ts b/src/logs/log-formatter.test.ts index c9ba36a81..3ca5d65aa 100644 --- a/src/logs/log-formatter.test.ts +++ b/src/logs/log-formatter.test.ts @@ -263,4 +263,58 @@ describe('LogFormatter', () => { expect(result).toContain('8080'); }); }); + + describe('PID enrichment', () => { + it('should display PID info in pretty format when available', () => { + const enhancedEntry = { + ...allowedEntry, + pid: 12345, + cmdline: 'curl https://api.github.com', + comm: 'curl', + inode: '123456', + }; + const formatter = new LogFormatter({ format: 'pretty', colorize: false }); + const result = formatter.formatEntry(enhancedEntry); + + expect(result).toContain(''); + }); + + it('should not display PID info when pid is -1', () => { + const enhancedEntry = { + ...allowedEntry, + pid: -1, + cmdline: 'unknown', + comm: 'unknown', + }; + const formatter = new LogFormatter({ format: 'pretty', colorize: false }); + const result = formatter.formatEntry(enhancedEntry); + + expect(result).not.toContain(' { + const formatter = new LogFormatter({ format: 'pretty', colorize: false }); + const result = formatter.formatEntry(allowedEntry); + + expect(result).not.toContain(' { + const enhancedEntry = { + ...allowedEntry, + pid: 12345, + cmdline: 'curl https://api.github.com', + comm: 'curl', + inode: '123456', + }; + const formatter = new LogFormatter({ format: 'json' }); + const result = formatter.formatEntry(enhancedEntry); + + const parsed = JSON.parse(result); + expect(parsed.pid).toBe(12345); + expect(parsed.cmdline).toBe('curl https://api.github.com'); + expect(parsed.comm).toBe('curl'); + expect(parsed.inode).toBe('123456'); + }); + }); }); diff --git a/src/logs/log-formatter.ts b/src/logs/log-formatter.ts index 8ac20c7be..43504a420 100644 --- a/src/logs/log-formatter.ts +++ b/src/logs/log-formatter.ts @@ -3,7 +3,7 @@ */ import chalk from 'chalk'; -import { ParsedLogEntry, OutputFormat } from '../types'; +import { ParsedLogEntry, OutputFormat, EnhancedLogEntry } from '../types'; /** * Options for log formatting @@ -28,12 +28,12 @@ export class LogFormatter { } /** - * Formats a parsed log entry + * Formats a parsed log entry (supports both ParsedLogEntry and EnhancedLogEntry) * - * @param entry - Parsed log entry + * @param entry - Parsed log entry (may include PID info) * @returns Formatted string with newline */ - formatEntry(entry: ParsedLogEntry): string { + formatEntry(entry: ParsedLogEntry | EnhancedLogEntry): string { switch (this.format) { case 'raw': throw new Error('Cannot format parsed entry as raw - use formatRaw for raw lines'); @@ -57,7 +57,7 @@ export class LogFormatter { /** * Formats an entry as pretty, human-readable output */ - private formatPretty(entry: ParsedLogEntry): string { + private formatPretty(entry: ParsedLogEntry | EnhancedLogEntry): string { // Format timestamp as readable date const date = new Date(entry.timestamp * 1000); const timeStr = date.toISOString().replace('T', ' ').substring(0, 23); @@ -73,8 +73,14 @@ export class LogFormatter { const userAgentPart = entry.userAgent && entry.userAgent !== '-' ? ` [${entry.userAgent}]` : ''; + // PID info (show if available) + const enhancedEntry = entry as EnhancedLogEntry; + const pidPart = enhancedEntry.pid !== undefined && enhancedEntry.pid !== -1 + ? ` ` + : ''; + // Build message - const message = `[${timeStr}] ${entry.method} ${target} → ${entry.statusCode} (${statusText})${userAgentPart}`; + const message = `[${timeStr}] ${entry.method} ${target} → ${entry.statusCode} (${statusText})${userAgentPart}${pidPart}`; // Colorize based on allowed/denied if (!this.colorize) { @@ -87,14 +93,14 @@ export class LogFormatter { /** * Formats an entry as JSON (newline-delimited) */ - private formatJson(entry: ParsedLogEntry): string { + private formatJson(entry: ParsedLogEntry | EnhancedLogEntry): string { return JSON.stringify(entry) + '\n'; } /** * Formats a batch of entries (primarily for JSON array output) */ - formatBatch(entries: ParsedLogEntry[]): string { + formatBatch(entries: (ParsedLogEntry | EnhancedLogEntry)[]): string { if (this.format === 'json') { return entries.map(e => this.formatJson(e)).join(''); } diff --git a/src/logs/log-streamer.ts b/src/logs/log-streamer.ts index d29a12234..48d23d7d6 100644 --- a/src/logs/log-streamer.ts +++ b/src/logs/log-streamer.ts @@ -6,10 +6,11 @@ import * as fs from 'fs'; import * as path from 'path'; import * as readline from 'readline'; import execa from 'execa'; -import { LogSource } from '../types'; +import { LogSource, EnhancedLogEntry } from '../types'; import { LogFormatter } from './log-formatter'; import { parseLogLine } from './log-parser'; import { logger } from '../logger'; +import { trackPidForPortSync, isPidTrackingAvailable } from '../pid-tracker'; /** * Options for streaming logs @@ -23,6 +24,8 @@ export interface StreamOptions { formatter: LogFormatter; /** Whether to parse logs (false for raw format) */ parse?: boolean; + /** Whether to enrich logs with PID/process info (real-time only) */ + withPid?: boolean; } /** @@ -31,12 +34,17 @@ export interface StreamOptions { * @param options - Streaming options */ export async function streamLogs(options: StreamOptions): Promise { - const { follow, source, formatter, parse = true } = options; + const { follow, source, formatter, parse = true, withPid = false } = options; + + // Check if PID tracking is available when requested + if (withPid && !isPidTrackingAvailable()) { + logger.warn('PID tracking not available on this system (requires /proc filesystem)'); + } if (source.type === 'running') { - await streamFromContainer(source.containerName!, follow, formatter, parse); + await streamFromContainer(source.containerName!, follow, formatter, parse, withPid); } else { - await streamFromFile(source.path!, follow, formatter, parse); + await streamFromFile(source.path!, follow, formatter, parse, withPid); } } @@ -47,7 +55,8 @@ async function streamFromContainer( containerName: string, follow: boolean, formatter: LogFormatter, - parse: boolean + parse: boolean, + withPid: boolean ): Promise { logger.debug(`Streaming logs from container: ${containerName}`); @@ -76,7 +85,7 @@ async function streamFromContainer( }); for await (const line of rl) { - processLine(line, formatter, parse); + processLine(line, formatter, parse, withPid); } } @@ -100,7 +109,8 @@ async function streamFromFile( logDir: string, follow: boolean, formatter: LogFormatter, - parse: boolean + parse: boolean, + withPid: boolean ): Promise { const filePath = path.join(logDir, 'access.log'); @@ -113,10 +123,10 @@ async function streamFromFile( if (follow) { // Use tail -f for live following - await tailFile(filePath, formatter, parse); + await tailFile(filePath, formatter, parse, withPid); } else { // Read entire file at once - await readFile(filePath, formatter, parse); + await readFile(filePath, formatter, parse, withPid); } } @@ -126,14 +136,15 @@ async function streamFromFile( async function readFile( filePath: string, formatter: LogFormatter, - parse: boolean + parse: boolean, + withPid: boolean ): Promise { const content = fs.readFileSync(filePath, 'utf-8'); const lines = content.split('\n'); for (const line of lines) { if (line.trim() === '') continue; - processLine(line, formatter, parse); + processLine(line, formatter, parse, withPid); } } @@ -143,7 +154,8 @@ async function readFile( async function tailFile( filePath: string, formatter: LogFormatter, - parse: boolean + parse: boolean, + withPid: boolean ): Promise { const proc = execa('tail', ['-f', filePath], { reject: false, @@ -163,7 +175,7 @@ async function tailFile( }); for await (const line of rl) { - processLine(line, formatter, parse); + processLine(line, formatter, parse, withPid); } } @@ -180,9 +192,35 @@ async function tailFile( } /** - * Processes a single log line - parses (if enabled) and outputs + * Enriches a parsed log entry with PID tracking information + * + * @param entry - Parsed log entry + * @returns Enhanced log entry with PID info (if available) + */ +function enrichWithPid(entry: EnhancedLogEntry): EnhancedLogEntry { + const port = parseInt(entry.clientPort, 10); + if (isNaN(port) || port <= 0 || port > 65535) { + return entry; + } + + const pidInfo = trackPidForPortSync(port); + if (pidInfo.pid !== -1) { + return { + ...entry, + pid: pidInfo.pid, + cmdline: pidInfo.cmdline, + comm: pidInfo.comm, + inode: pidInfo.inode, + }; + } + + return entry; +} + +/** + * Processes a single log line - parses (if enabled), enriches with PID (if enabled), and outputs */ -function processLine(line: string, formatter: LogFormatter, parse: boolean): void { +function processLine(line: string, formatter: LogFormatter, parse: boolean, withPid: boolean): void { if (!parse) { // Raw format - output as-is process.stdout.write(formatter.formatRaw(line)); @@ -192,7 +230,9 @@ function processLine(line: string, formatter: LogFormatter, parse: boolean): voi // Parse and format const entry = parseLogLine(line); if (entry) { - process.stdout.write(formatter.formatEntry(entry)); + // Enrich with PID info if enabled + const enhancedEntry = withPid ? enrichWithPid(entry) : entry; + process.stdout.write(formatter.formatEntry(enhancedEntry)); } else { // Failed to parse, output as raw with a warning indicator logger.debug(`Failed to parse log line: ${line}`); diff --git a/src/pid-tracker.test.ts b/src/pid-tracker.test.ts new file mode 100644 index 000000000..d4e656491 --- /dev/null +++ b/src/pid-tracker.test.ts @@ -0,0 +1,352 @@ +/** + * Unit tests for pid-tracker.ts + * + * These tests use mock /proc filesystem data to test the parsing + * and tracking logic without requiring actual system access. + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { + parseHexIp, + parseHexPort, + parseNetTcp, + findInodeForPort, + isNumeric, + readCmdline, + readComm, + getProcessInfo, + trackPidForPort, + trackPidForPortSync, + isPidTrackingAvailable, +} from './pid-tracker'; + +describe('pid-tracker', () => { + describe('parseHexIp', () => { + it('should parse localhost (127.0.0.1) correctly', () => { + // 127.0.0.1 in little-endian hex is 0100007F + expect(parseHexIp('0100007F')).toBe('127.0.0.1'); + }); + + it('should parse 0.0.0.0 correctly', () => { + expect(parseHexIp('00000000')).toBe('0.0.0.0'); + }); + + it('should parse 192.168.1.1 correctly', () => { + // 192.168.1.1 in little-endian hex: 01 01 A8 C0 + expect(parseHexIp('0101A8C0')).toBe('192.168.1.1'); + }); + + it('should parse 10.0.0.1 correctly', () => { + // 10.0.0.1 in little-endian hex: 01 00 00 0A + expect(parseHexIp('0100000A')).toBe('10.0.0.1'); + }); + + it('should parse 172.30.0.20 correctly', () => { + // 172.30.0.20 in little-endian hex: 14 00 1E AC + expect(parseHexIp('14001EAC')).toBe('172.30.0.20'); + }); + }); + + describe('parseHexPort', () => { + it('should parse port 443 correctly', () => { + expect(parseHexPort('01BB')).toBe(443); + }); + + it('should parse port 80 correctly', () => { + expect(parseHexPort('0050')).toBe(80); + }); + + it('should parse port 3128 correctly', () => { + expect(parseHexPort('0C38')).toBe(3128); + }); + + it('should parse high port correctly', () => { + expect(parseHexPort('C000')).toBe(49152); + }); + + it('should parse port 0 correctly', () => { + expect(parseHexPort('0000')).toBe(0); + }); + }); + + describe('parseNetTcp', () => { + const sampleNetTcp = ` sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode + 0: 0100007F:0CEA 00000000:0000 0A 00000000:00000000 00:00000000 00000000 1000 0 123456 1 0000000000000000 100 0 0 10 0 + 1: 0100007F:01BB 00000000:0000 0A 00000000:00000000 00:00000000 00000000 1000 0 789012 1 0000000000000000 100 0 0 10 0 + 2: 14001EAC:B278 8C728E58:01BB 01 00000000:00000000 02:000A8D98 00000000 1000 0 345678 1 0000000000000000 100 0 0 10 0`; + + it('should parse /proc/net/tcp content correctly', () => { + const entries = parseNetTcp(sampleNetTcp); + expect(entries).toHaveLength(3); + }); + + it('should parse local port correctly', () => { + const entries = parseNetTcp(sampleNetTcp); + expect(entries[0].localPort).toBe(3306); // 0CEA in hex + expect(entries[1].localPort).toBe(443); // 01BB in hex + expect(entries[2].localPort).toBe(45688); // B278 in hex + }); + + it('should parse remote port correctly', () => { + const entries = parseNetTcp(sampleNetTcp); + expect(entries[0].remotePort).toBe(0); + expect(entries[1].remotePort).toBe(0); + expect(entries[2].remotePort).toBe(443); + }); + + it('should parse inode correctly', () => { + const entries = parseNetTcp(sampleNetTcp); + expect(entries[0].inode).toBe('123456'); + expect(entries[1].inode).toBe('789012'); + expect(entries[2].inode).toBe('345678'); + }); + + it('should parse connection state correctly', () => { + const entries = parseNetTcp(sampleNetTcp); + expect(entries[0].state).toBe('0A'); // LISTEN + expect(entries[1].state).toBe('0A'); // LISTEN + expect(entries[2].state).toBe('01'); // ESTABLISHED + }); + + it('should parse UID correctly', () => { + const entries = parseNetTcp(sampleNetTcp); + expect(entries[0].uid).toBe(1000); + }); + + it('should handle empty content', () => { + const entries = parseNetTcp(''); + expect(entries).toHaveLength(0); + }); + + it('should handle header only', () => { + const entries = parseNetTcp( + ' sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode' + ); + expect(entries).toHaveLength(0); + }); + }); + + describe('findInodeForPort', () => { + const entries = [ + { + localAddressHex: '0100007F', + localPort: 3306, + remoteAddressHex: '00000000', + remotePort: 0, + state: '0A', + inode: '123456', + uid: 1000, + }, + { + localAddressHex: '0100007F', + localPort: 443, + remoteAddressHex: '00000000', + remotePort: 0, + state: '0A', + inode: '789012', + uid: 1000, + }, + ]; + + it('should find inode for existing port', () => { + expect(findInodeForPort(entries, 3306)).toBe('123456'); + expect(findInodeForPort(entries, 443)).toBe('789012'); + }); + + it('should return undefined for non-existent port', () => { + expect(findInodeForPort(entries, 8080)).toBeUndefined(); + }); + + it('should return undefined for empty entries', () => { + expect(findInodeForPort([], 3306)).toBeUndefined(); + }); + }); + + describe('isNumeric', () => { + it('should return true for numeric strings', () => { + expect(isNumeric('123')).toBe(true); + expect(isNumeric('1')).toBe(true); + expect(isNumeric('0')).toBe(true); + expect(isNumeric('999999')).toBe(true); + }); + + it('should return false for non-numeric strings', () => { + expect(isNumeric('')).toBe(false); + expect(isNumeric('abc')).toBe(false); + expect(isNumeric('12a')).toBe(false); + expect(isNumeric('-1')).toBe(false); + expect(isNumeric('1.5')).toBe(false); + expect(isNumeric(' 123')).toBe(false); + }); + }); + + describe('Mock /proc filesystem tests', () => { + let mockProcPath: string; + + beforeEach(() => { + // Create a temporary mock /proc directory + mockProcPath = fs.mkdtempSync(path.join(os.tmpdir(), 'mock-proc-')); + }); + + afterEach(() => { + // Clean up + fs.rmSync(mockProcPath, { recursive: true, force: true }); + }); + + const createMockProc = ( + pid: number, + cmdline: string, + comm: string, + socketInodes: string[] + ) => { + const pidDir = path.join(mockProcPath, pid.toString()); + fs.mkdirSync(pidDir, { recursive: true }); + + // Write cmdline (null-separated) + fs.writeFileSync(path.join(pidDir, 'cmdline'), cmdline.replace(/ /g, '\0')); + + // Write comm + fs.writeFileSync(path.join(pidDir, 'comm'), comm); + + // Create fd directory and socket links + const fdDir = path.join(pidDir, 'fd'); + fs.mkdirSync(fdDir, { recursive: true }); + + socketInodes.forEach((inode, index) => { + const fdPath = path.join(fdDir, (index + 3).toString()); + // We can't create actual socket symlinks, so we'll mock readlinkSync in tests + fs.writeFileSync(fdPath, `socket:[${inode}]`); + }); + }; + + const createMockNetTcp = (entries: string) => { + const netDir = path.join(mockProcPath, 'net'); + fs.mkdirSync(netDir, { recursive: true }); + fs.writeFileSync(path.join(netDir, 'tcp'), entries); + }; + + describe('readCmdline', () => { + it('should read command line from mock proc', () => { + createMockProc(1234, 'curl https://github.com', 'curl', []); + const result = readCmdline(1234, mockProcPath); + expect(result).toBe('curl https://github.com'); + }); + + it('should return null for non-existent process', () => { + const result = readCmdline(99999, mockProcPath); + expect(result).toBeNull(); + }); + }); + + describe('readComm', () => { + it('should read comm from mock proc', () => { + createMockProc(1234, 'curl', 'curl', []); + const result = readComm(1234, mockProcPath); + expect(result).toBe('curl'); + }); + + it('should return null for non-existent process', () => { + const result = readComm(99999, mockProcPath); + expect(result).toBeNull(); + }); + }); + + describe('getProcessInfo', () => { + it('should get process info from mock proc', () => { + createMockProc(1234, 'node server.js', 'node', []); + const result = getProcessInfo(1234, mockProcPath); + expect(result).not.toBeNull(); + expect(result!.cmdline).toBe('node server.js'); + expect(result!.comm).toBe('node'); + }); + + it('should return null for non-existent process', () => { + const result = getProcessInfo(99999, mockProcPath); + expect(result).toBeNull(); + }); + }); + + describe('isPidTrackingAvailable', () => { + it('should return true when /proc/net/tcp exists', () => { + createMockNetTcp('header\n'); + expect(isPidTrackingAvailable(mockProcPath)).toBe(true); + }); + + it('should return false when /proc/net/tcp does not exist', () => { + expect(isPidTrackingAvailable(mockProcPath)).toBe(false); + }); + }); + + describe('trackPidForPort', () => { + it('should return error when /proc/net/tcp does not exist', async () => { + const result = await trackPidForPort(45678, mockProcPath); + expect(result.pid).toBe(-1); + expect(result.error).toContain('Failed to read'); + }); + + it('should return error when port not found in tcp table', async () => { + const netTcpContent = ` sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode + 0: 0100007F:0CEA 00000000:0000 0A 00000000:00000000 00:00000000 00000000 1000 0 123456 1 0000000000000000 100 0 0 10 0`; + createMockNetTcp(netTcpContent); + + const result = await trackPidForPort(99999, mockProcPath); + expect(result.pid).toBe(-1); + expect(result.error).toContain('No socket found'); + }); + }); + + describe('trackPidForPortSync', () => { + it('should return error when /proc/net/tcp does not exist', () => { + const result = trackPidForPortSync(45678, mockProcPath); + expect(result.pid).toBe(-1); + expect(result.error).toContain('Failed to read'); + }); + + it('should return error when port not found in tcp table', () => { + const netTcpContent = ` sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode + 0: 0100007F:0CEA 00000000:0000 0A 00000000:00000000 00:00000000 00000000 1000 0 123456 1 0000000000000000 100 0 0 10 0`; + createMockNetTcp(netTcpContent); + + const result = trackPidForPortSync(99999, mockProcPath); + expect(result.pid).toBe(-1); + expect(result.error).toContain('No socket found'); + }); + }); + }); + + describe('Real /proc filesystem (integration)', () => { + // These tests only run if /proc is available (Linux only) + const isLinux = process.platform === 'linux'; + + it('should check if PID tracking is available', () => { + const result = isPidTrackingAvailable(); + // On Linux, this should be true; on other platforms, false + if (isLinux) { + expect(result).toBe(true); + } else { + expect(result).toBe(false); + } + }); + + if (isLinux) { + it('should be able to parse real /proc/net/tcp', () => { + const tcpPath = '/proc/net/tcp'; + if (fs.existsSync(tcpPath)) { + const content = fs.readFileSync(tcpPath, 'utf-8'); + const entries = parseNetTcp(content); + // Should be able to parse without errors + expect(Array.isArray(entries)).toBe(true); + } + }); + + it('should get info for current process', () => { + const pid = process.pid; + const info = getProcessInfo(pid); + expect(info).not.toBeNull(); + expect(info!.comm).toContain('node'); + }); + } + }); +}); diff --git a/src/pid-tracker.ts b/src/pid-tracker.ts new file mode 100644 index 000000000..59ec80c14 --- /dev/null +++ b/src/pid-tracker.ts @@ -0,0 +1,444 @@ +/** + * PID Tracker - Correlates network requests to processes using /proc filesystem + * + * This module provides functionality to trace network connections back to their + * originating processes by reading /proc/net/tcp and scanning /proc/[pid]/fd. + * + * The tracking flow: + * 1. Parse /proc/net/tcp to find the socket inode for a given local port + * 2. Scan /proc/[pid]/fd/ directories to find which process owns that socket + * 3. Read /proc/[pid]/cmdline to get the full command line + * + * @example + * ```typescript + * import { trackPidForPort, getProcessInfo, parseNetTcp } from './pid-tracker'; + * + * // Track a process by its source port + * const result = await trackPidForPort(45678); + * console.log(result); + * // { pid: 12345, cmdline: 'curl https://github.com', comm: 'curl', inode: '123456' } + * ``` + */ + +import * as fs from 'fs'; +import * as fsPromises from 'fs/promises'; +import * as path from 'path'; +import { PidTrackResult } from './types'; + +// Re-export PidTrackResult for convenience +export { PidTrackResult } from './types'; + +/** + * Parsed entry from /proc/net/tcp + */ +export interface NetTcpEntry { + /** Local IP address in hex format */ + localAddressHex: string; + /** Local port number */ + localPort: number; + /** Remote IP address in hex format */ + remoteAddressHex: string; + /** Remote port number */ + remotePort: number; + /** Connection state (e.g., 01 = ESTABLISHED, 06 = TIME_WAIT) */ + state: string; + /** Socket inode number */ + inode: string; + /** UID of the process owning the socket */ + uid: number; +} + +/** + * Parses a hex IP address from /proc/net/tcp format to dotted decimal + * Note: /proc/net/tcp stores IP addresses in little-endian hex format + * + * @param hexIp - Hex IP address (e.g., "0100007F" for 127.0.0.1) + * @returns Dotted decimal IP address (e.g., "127.0.0.1") + */ +export function parseHexIp(hexIp: string): string { + // /proc/net/tcp stores IPs in little-endian format + // So "0100007F" means 127.0.0.1 + const bytes = []; + for (let i = 6; i >= 0; i -= 2) { + bytes.push(parseInt(hexIp.substring(i, i + 2), 16)); + } + return bytes.join('.'); +} + +/** + * Converts a hex port number to decimal + * + * @param hexPort - Hex port number (e.g., "01BB" for 443) + * @returns Decimal port number + */ +export function parseHexPort(hexPort: string): number { + return parseInt(hexPort, 16); +} + +/** + * Parses /proc/net/tcp content and returns structured entries + * + * The format of /proc/net/tcp is: + * sl local_address rem_address st tx_queue:rx_queue tr:tm->when retrnsmt uid timeout inode + * + * @param content - Raw content of /proc/net/tcp + * @returns Array of parsed TCP connection entries + */ +export function parseNetTcp(content: string): NetTcpEntry[] { + const lines = content.trim().split('\n'); + const entries: NetTcpEntry[] = []; + + // Skip header line + for (let i = 1; i < lines.length; i++) { + const line = lines[i].trim(); + if (!line) continue; + + // Split by whitespace + const fields = line.split(/\s+/); + if (fields.length < 10) continue; + + // Fields: sl, local_address, rem_address, st, tx:rx, tr:tm, retrnsmt, uid, timeout, inode + const localAddress = fields[1]; // e.g., "0100007F:01BB" + const remoteAddress = fields[2]; + const state = fields[3]; + const uid = parseInt(fields[7], 10); + const inode = fields[9]; + + // Parse local address + const [localAddrHex, localPortHex] = localAddress.split(':'); + const localPort = parseHexPort(localPortHex); + + // Parse remote address + const [remoteAddrHex, remotePortHex] = remoteAddress.split(':'); + const remotePort = parseHexPort(remotePortHex); + + entries.push({ + localAddressHex: localAddrHex, + localPort, + remoteAddressHex: remoteAddrHex, + remotePort, + state, + inode, + uid, + }); + } + + return entries; +} + +/** + * Finds the socket inode for a given local port + * + * @param entries - Parsed /proc/net/tcp entries + * @param srcPort - Source port to find + * @returns Socket inode string or undefined if not found + */ +export function findInodeForPort(entries: NetTcpEntry[], srcPort: number): string | undefined { + const entry = entries.find((e) => e.localPort === srcPort); + return entry?.inode; +} + +/** + * Checks if a string is numeric (for filtering /proc entries) + * + * @param str - String to check + * @returns true if the string represents a positive integer + */ +export function isNumeric(str: string): boolean { + return /^\d+$/.test(str); +} + +/** + * Reads the command line for a process from /proc/[pid]/cmdline + * The cmdline file contains null-separated arguments + * + * @param pid - Process ID + * @param procPath - Base path to /proc (default: '/proc') + * @returns Command line string with arguments separated by spaces, or null if not readable + */ +export function readCmdline(pid: number, procPath = '/proc'): string | null { + try { + const cmdlinePath = path.join(procPath, pid.toString(), 'cmdline'); + const content = fs.readFileSync(cmdlinePath, 'utf-8'); + // cmdline contains null-separated arguments, replace with spaces + return content.replace(/\0/g, ' ').trim(); + } catch { + return null; + } +} + +/** + * Reads the short command name from /proc/[pid]/comm + * + * @param pid - Process ID + * @param procPath - Base path to /proc (default: '/proc') + * @returns Short command name, or null if not readable + */ +export function readComm(pid: number, procPath = '/proc'): string | null { + try { + const commPath = path.join(procPath, pid.toString(), 'comm'); + return fs.readFileSync(commPath, 'utf-8').trim(); + } catch { + return null; + } +} + +/** + * Gets the symlink target for a file descriptor + * + * @param fdPath - Full path to the fd symlink + * @returns Symlink target (e.g., 'socket:[123456]'), or null if not readable + */ +export function readFdLink(fdPath: string): string | null { + try { + return fs.readlinkSync(fdPath); + } catch { + return null; + } +} + +/** + * Scans a process's file descriptors to find one that matches the given socket inode + * + * @param pid - Process ID to scan + * @param inode - Socket inode to look for + * @param procPath - Base path to /proc (default: '/proc') + * @returns true if the process owns the socket, false otherwise + */ +export function processOwnsSocket(pid: number, inode: string, procPath = '/proc'): boolean { + const fdDir = path.join(procPath, pid.toString(), 'fd'); + + try { + const fds = fs.readdirSync(fdDir); + for (const fd of fds) { + const fdPath = path.join(fdDir, fd); + const link = readFdLink(fdPath); + if (link && link === `socket:[${inode}]`) { + return true; + } + } + } catch { + // Process may have exited or we don't have permission + return false; + } + + return false; +} + +/** + * Finds the process that owns a socket with the given inode + * + * @param inode - Socket inode to find + * @param procPath - Base path to /proc (default: '/proc') + * @returns Object with pid, cmdline, and comm, or null if not found + */ +export function findProcessByInode( + inode: string, + procPath = '/proc' +): { pid: number; cmdline: string; comm: string } | null { + try { + const entries = fs.readdirSync(procPath); + const pids = entries.filter(isNumeric).map((s) => parseInt(s, 10)); + + for (const pid of pids) { + if (processOwnsSocket(pid, inode, procPath)) { + const cmdline = readCmdline(pid, procPath) || 'unknown'; + const comm = readComm(pid, procPath) || 'unknown'; + return { pid, cmdline, comm }; + } + } + } catch { + // Could not read /proc + return null; + } + + return null; +} + +/** + * Gets detailed information about a process + * + * @param pid - Process ID + * @param procPath - Base path to /proc (default: '/proc') + * @returns Object with cmdline and comm, or null if not found + */ +export function getProcessInfo( + pid: number, + procPath = '/proc' +): { cmdline: string; comm: string } | null { + const cmdline = readCmdline(pid, procPath); + const comm = readComm(pid, procPath); + + if (cmdline === null && comm === null) { + return null; + } + + return { + cmdline: cmdline || 'unknown', + comm: comm || 'unknown', + }; +} + +/** + * Main function to track a process by its source port + * + * This reads /proc/net/tcp to find the socket inode, then scans + * all process file descriptors to find the owning process. + * + * @param srcPort - Source port number from the network connection + * @param procPath - Base path to /proc (default: '/proc', useful for testing) + * @returns PidTrackResult with process information + * + * @example + * ```typescript + * const result = await trackPidForPort(45678); + * if (result.pid !== -1) { + * console.log(`Port 45678 is owned by PID ${result.pid}: ${result.cmdline}`); + * } + * ``` + */ +export async function trackPidForPort( + srcPort: number, + procPath = '/proc' +): Promise { + try { + // Read /proc/net/tcp using async operations + const tcpPath = path.join(procPath, 'net', 'tcp'); + let tcpContent: string; + + try { + tcpContent = await fsPromises.readFile(tcpPath, 'utf-8'); + } catch (err) { + return { + pid: -1, + cmdline: 'unknown', + comm: 'unknown', + error: `Failed to read ${tcpPath}: ${err}`, + }; + } + + // Parse TCP connections and find the inode for our port + const entries = parseNetTcp(tcpContent); + const inode = findInodeForPort(entries, srcPort); + + if (!inode || inode === '0') { + return { + pid: -1, + cmdline: 'unknown', + comm: 'unknown', + error: `No socket found for port ${srcPort}`, + }; + } + + // Find the process that owns this socket (uses sync operations for fd scanning) + // This is intentional as the /proc filesystem is very fast and sync is simpler + const processInfo = findProcessByInode(inode, procPath); + + if (!processInfo) { + return { + pid: -1, + cmdline: 'unknown', + comm: 'unknown', + inode, + error: `Socket inode ${inode} found but no process owns it`, + }; + } + + return { + pid: processInfo.pid, + cmdline: processInfo.cmdline, + comm: processInfo.comm, + inode, + }; + } catch (err) { + return { + pid: -1, + cmdline: 'unknown', + comm: 'unknown', + error: `Unexpected error: ${err}`, + }; + } +} + +/** + * Synchronous version of trackPidForPort for use in contexts where async is not available + * + * @param srcPort - Source port number from the network connection + * @param procPath - Base path to /proc (default: '/proc') + * @returns PidTrackResult with process information + */ +export function trackPidForPortSync(srcPort: number, procPath = '/proc'): PidTrackResult { + try { + // Read /proc/net/tcp + const tcpPath = path.join(procPath, 'net', 'tcp'); + let tcpContent: string; + + try { + tcpContent = fs.readFileSync(tcpPath, 'utf-8'); + } catch (err) { + return { + pid: -1, + cmdline: 'unknown', + comm: 'unknown', + error: `Failed to read ${tcpPath}: ${err}`, + }; + } + + // Parse TCP connections and find the inode for our port + const entries = parseNetTcp(tcpContent); + const inode = findInodeForPort(entries, srcPort); + + if (!inode || inode === '0') { + return { + pid: -1, + cmdline: 'unknown', + comm: 'unknown', + error: `No socket found for port ${srcPort}`, + }; + } + + // Find the process that owns this socket + const processInfo = findProcessByInode(inode, procPath); + + if (!processInfo) { + return { + pid: -1, + cmdline: 'unknown', + comm: 'unknown', + inode, + error: `Socket inode ${inode} found but no process owns it`, + }; + } + + return { + pid: processInfo.pid, + cmdline: processInfo.cmdline, + comm: processInfo.comm, + inode, + }; + } catch (err) { + return { + pid: -1, + cmdline: 'unknown', + comm: 'unknown', + error: `Unexpected error: ${err}`, + }; + } +} + +/** + * Checks if PID tracking is available on the current system + * (requires /proc filesystem to be mounted and readable) + * + * @param procPath - Base path to /proc (default: '/proc') + * @returns true if PID tracking is available + */ +export function isPidTrackingAvailable(procPath = '/proc'): boolean { + try { + const tcpPath = path.join(procPath, 'net', 'tcp'); + fs.accessSync(tcpPath, fs.constants.R_OK); + return true; + } catch { + return false; + } +} diff --git a/src/types.ts b/src/types.ts index 5cc392dfc..34c718494 100644 --- a/src/types.ts +++ b/src/types.ts @@ -732,3 +732,39 @@ export interface LogSource { /** Human-readable date string (for preserved type) */ dateStr?: string; } + +/** + * Result of PID tracking operation + * + * Contains information about the process that made a network request, + * identified by correlating the source port with /proc filesystem data. + */ +export interface PidTrackResult { + /** Process ID that owns the socket, or -1 if not found */ + pid: number; + /** Full command line of the process, or 'unknown' if not found */ + cmdline: string; + /** Short command name (from /proc/[pid]/comm), or 'unknown' if not found */ + comm: string; + /** Socket inode number, or undefined if not found */ + inode?: string; + /** Error message if tracking failed, or undefined on success */ + error?: string; +} + +/** + * Extended log entry with PID tracking information + * + * Combines the standard parsed log entry with process attribution + * for complete request tracking. + */ +export interface EnhancedLogEntry extends ParsedLogEntry { + /** Process ID that made the request, or -1 if unknown */ + pid?: number; + /** Full command line of the process that made the request */ + cmdline?: string; + /** Short command name (from /proc/[pid]/comm) */ + comm?: string; + /** Socket inode associated with the connection */ + inode?: string; +}